In [20]:
import pandas as pd
import numpy as np
import openai
import os

In [None]:
myapikey = open('my_gpt_apikey.txt').readline().rstrip()

client = openai.OpenAI(api_key = myapikey)

# Use GPT to gather data about the nyt authors

In [64]:
nyt_single = pd.read_csv("nyt_single_authors.csv")
nyt_single.head()

Unnamed: 0,year,week,rank,title_id,title,author,multiple,problem
0,1931,1931-10-12,1,6477,THE TEN COMMANDMENTS,Warwick Deeping,False,False
1,1931,1931-10-12,2,1808,FINCHE'S FORTUNE,Mazo de la Roche,False,False
2,1931,1931-10-12,3,5304,THE GOOD EARTH,Pearl S. Buck,False,False
3,1931,1931-10-12,4,4038,SHADOWS ON THE ROCK,Willa Cather,False,False
4,1931,1931-10-12,5,3946,SCARMOUCHE THE KING MAKER,Rafael Sabatini,False,False


In [86]:
#make a list of all the authors
nyt_single_authors = nyt_single.author.unique()
nyt_single_authors_five = nyt_single_authors[:5]

In [92]:
def get_basic_demographics(my_author_list):
    df = pd.DataFrame(columns=['birth','death','race_eth','education', 'institution'], index= my_author_list)
    for author in my_author_list:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": """You are a literary historian. 
                You reply with only the requested information, not a complete sentence, and no headers. 
                If you do not know information, fill it in with None.
                If an author is still living, fill in the death year with N/A.
                For example, if the author is Chimamanda Ngozi Adichie, you would return: 1977, N/A, Nigerian, Master's Degree, Johns Hopkins University"""},
                {
                    "role": "user",
                    "content": """What is the birth year, death year, race/ethnicity, highest level of education, and 
                    college or university of """+author
                }
            ]
        )
        my_response = completion.choices[0].message.content
        #print(my_response)
        response_list = my_response.split(',')
        df.loc[author] = pd.Series({'birth':response_list[0],'death':response_list[1],'race_eth':response_list[2],'education':response_list[3], 'institution':response_list[4]})
        
        df = df.reset_index()
        df.rename(columns = {"index":"author"}, inplace = True)
    
    return(df)



                 birth  death    race_eth           education  \
Warwick Deeping   1877   1950     British                None   
Mazo de la Roche  1879   1961    Canadian                None   
Pearl S. Buck     1892   1973   Caucasian           Doctorate   
Willa Cather      1873   1947       White   Bachelor's Degree   
Rafael Sabatini   1875   1950     Italian                None   

                                      institution  
Warwick Deeping                              None  
Mazo de la Roche                             None  
Pearl S. Buck        Wisconsin-Madison University  
Willa Cather       University of Nebraska-Lincoln  
Rafael Sabatini                              None  


0

In [None]:
df = get_basic_demographics(nyt_single_authors)
print(df.head())

df.to_csv("nyt_single_author_demographics.csv")
os.system('say "your program has finished"')

In [139]:
def get_birthplace(my_author_list):
    df = pd.DataFrame(columns=['birthplace'], index= my_author_list)
    
    for author in my_author_list:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": """You are a literary historian. 
                You reply with only the requested information, not a complete sentence, and no headers. 
                Give the most detailed information you can find, including city, state, country
                If you do not know information, fill it in with None.
                For example, if the author is Pearl S. Buck, you would return: Hillsboro, WV, USA"""},
                {
                    "role": "user",
                    "content": """What is the birthplace of """+author
                }
            ]
        )
        my_response = completion.choices[0].message.content
        #print(my_response)
        df.loc[author] = pd.Series({'birthplace':my_response})
        df = df.reset_index()
        df.rename(columns = {"index":"author"}, inplace = True)
        
    return df



In [105]:
df1 = get_birthplace(nyt_single_authors)
df1.to_csv("nyt_single_author_birthplace.csv")
print(df1.head())

os.system('say "your program has finished"')

                                        birthplace
Warwick Deeping     Hungerford, Berkshire, England
Mazo de la Roche        Newmarket, Ontario, Canada
Pearl S. Buck                   Hillsboro, WV, USA
Willa Cather      Back Creek Valley, Virginia, USA
Rafael Sabatini                  Jaskowice, Poland


## Get demographics for the fixed authors

In [111]:
nyt_reviewed = pd.read_csv("nyt_reviewed_authors.csv")
nyt_reviewed.head()

Unnamed: 0,author_x,birth,death,race_eth,education,institution,year,week,rank,title_id,title,author_y,problem,author_alt,author_1,author_2,_merge
0,,,,,,,1995.0,1995-12-31,14.0,6336.0,THE SMALL ONE,Alex Walsh. Illustrated by Jesse Clay,True,Alex Walsh,Alex Walsh,,right_only
1,,,,,,,2008.0,2008-03-02,15.0,974.0,CELEBUTANTES,Amanda Goldberg and Ruthanna Khalighi Hopper,True,Amanda Goldberg and Ruthanna Khalighi Hopper,Amanda Goldberg,Ruthanna Khalighi Hopper,right_only
2,,,,,,,2011.0,2011-01-23,9.0,6836.0,THREE SECONDS,Anders Roslund and Borge Hellstrom,True,Anders Roslund and Borge Hellstrom,Anders Roslund,Borge Hellstrom,right_only
3,,,,,,,2011.0,2011-01-30,8.0,6836.0,THREE SECONDS,Anders Roslund and Borge Hellstrom,True,Anders Roslund and Borge Hellstrom,Anders Roslund,Borge Hellstrom,right_only
4,,,,,,,2011.0,2011-02-06,15.0,6836.0,THREE SECONDS,Anders Roslund and Borge Hellstrom,True,Anders Roslund and Borge Hellstrom,Anders Roslund,Borge Hellstrom,right_only


In [112]:
nyt_reviewed_authors = nyt_reviewed.author_1.unique()

In [113]:
df_review = get_basic_demographics(nyt_reviewed_authors)
df_review.head()

Unnamed: 0,birth,death,race_eth,education,institution
Alex Walsh,,,,,
Amanda Goldberg,1970.0,,American,Bachelor's Degree,Harvard University
Anders Roslund,1965.0,,Swedish,Bachelor's Degree,University of Gothenburg
Armin L. Robinson,1953.0,,African American,Bachelor's Degree,University of California
Arnaud de Borchgrave,1926.0,2019.0,Belgian,Bachelor's Degree,Université catholique de Louvain


In [132]:
df_review.head()

Unnamed: 0,author,birth,death,race_eth,education,institution
0,Alex Walsh,,,,,
1,Amanda Goldberg,1970.0,,American,Bachelor's Degree,Harvard University
2,Anders Roslund,1965.0,,Swedish,Bachelor's Degree,University of Gothenburg
3,Armin L. Robinson,1953.0,,African American,Bachelor's Degree,University of California
4,Arnaud de Borchgrave,1926.0,2019.0,Belgian,Bachelor's Degree,Université catholique de Louvain


In [134]:
df_review.to_csv("nyt_reviewed_author_demographics.csv")

In [None]:
df1_review = get_birthplace(nyt_single_authors)
df1_review.to_csv("nyt_reviewed_author_birthplace.csv")
os.system('say "your program has finished"')