In [22]:
import os
import numpy as np
from tqdm import tqdm
from openai import OpenAI

np.random.seed(18102022)

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

records = []

for _ in tqdm(range(200)):
    age = int(np.random.uniform(21, 70))
    sentiment = np.random.choice(['highly positive', 'positive', 'neutral', 'negative', 'extremely negative'], 
                                 p=[0.1, 0.2, 0.4, 0.2, 0.1])
    sector = np.random.choice(['health', 'finance', 'academia', 'software engineering', 'ecommerce'])
    education = np.random.choice(['phd', 'college', 'trainee', 'none'], p=[0.05, 0.4, 0.5, 0.05])
    
    
    prompt = f'''
    Generate me a five sentence recommendation letter with a {sentiment} sentiment for a person of age {age}, with education level {education}, 
    who worked in the {sector} sector. Make sure that the letter refers to specific tasks from this sector.  Refer to the person as [person]. 
    Only respond with the letter text.'''
                             
    
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="gpt-4o-mini",
    )
    
    letter = chat_completion.choices[0].message.content
    records.append((age, sentiment, sector, education, letter))
    

100%|█████████████████████████████████████████████████████████████████████████████████████| 200/200 [10:55<00:00,  3.28s/it]


In [27]:
import pandas as pd
df = pd.DataFrame.from_records(records, columns=['age', 'sentiment', 'sector', 'education', 'letter'])
df

Unnamed: 0,age,sentiment,sector,education,letter
0,39,extremely negative,academia,none,"To Whom It May Concern,\n\nI regret to write t..."
1,36,extremely negative,academia,trainee,"To Whom It May Concern,\n\nI regret to inform ..."
2,64,positive,finance,college,"To Whom It May Concern,\n\nI am pleased to rec..."
3,46,neutral,health,trainee,"To whom it may concern,\n\nI am writing to rec..."
4,53,negative,finance,trainee,"To Whom It May Concern,\n\nI regret to inform ..."
...,...,...,...,...,...
195,53,negative,finance,trainee,"To whom it may concern,\n\nI regret to inform ..."
196,36,extremely negative,finance,college,"To Whom It May Concern,\n\nI regret to inform ..."
197,23,negative,finance,college,"Dear [Recipient's Name],\n\nI regret to inform..."
198,22,positive,academia,trainee,"To whom it may concern,\n\nI am pleased to rec..."


In [28]:
df.to_csv('synthetic_letters.csv', index=False)