# Set Env Vars

In [None]:
import os

def parse_and_set_env_vars(command):
    # Splitting the command into individual assignments
    assignments = command.replace("export ", "").split()
    
    for assignment in assignments:
        # Splitting each assignment into variable and value
        var, val = assignment.split('=', 1)
        os.environ[var] = val
        print(f"Set {var}={val}")

command = '''OPENAI_API_KEY=<API_KEY>'''
parse_and_set_env_vars(command)

print(os.environ.get("OPENAI_API_KEY"))

# Get Questions

## Parsing the typical tasks

In [40]:
with open('typical_tasks.txt', 'r') as file:
    data = file.read()

In [42]:
rows = []
for task in data.split('\n\n'):
    task_dict = {}
    for line in task.split('\n'):
        key, value = line.split(': ', 1)
        task_dict[key] = value
    rows.append(task_dict)
print(rows)

import json
with open('typical_tasks.json', 'w') as file:
    json.dump(rows, file, indent=4)

[{'Task': 'Curating a Career Plan ', 'Description': 'Given a current career path and user preferences for an end-goal occupation, can we automatically curate a career plan that arrives at the end-goal occupation, with intermediate steps?', 'Exemplar questions': "I'm a first-year master's student at Stanford CS. I did previous research internships at Microsoft and Amazon on Large Language Models. I have previous publications at NLP conferences including EMNLP, NAACL, and ACL. If I want to eventually work for OpenAI as a research scientist, how should I plan my career path to maximize the probability of landing the job?"}, {'Task': 'Suggesting Potential Occupations and Wage Projections', 'Description': "Given a user's interests, can we suggest potential occupations for the user and cast e.g. the wage for those occupations?", 'Exemplar Question': "I'm passionate about environmental sustainability and have a background in environmental science. What are some emerging green jobs that align 

## Asking GPT for more questions

In [48]:
from openai import OpenAI
from jinja2 import Environment, FileSystemLoader, select_autoescape

jinja_environment = Environment(loader=FileSystemLoader('./'),
                  autoescape=select_autoescape(), trim_blocks=True, lstrip_blocks=True, line_comment_prefix='#')

template_file = 'templates/get_simulated_queries.jinja2'
template = jinja_environment.get_template(template_file)
client = OpenAI()

with open('typical_tasks.json', 'r') as file:
    data = json.load(file)

for idx, row in enumerate(data[:3]):
    print(row)
    prompt_parameter_values = {
        "task_description": row['Description'],
        "example_question": row['Exemplar Question']
    }
    filled_prompt = template.render(**prompt_parameter_values)
    print(filled_prompt)

    kwargs = {
        'messages': [{"role": "system", "content": filled_prompt}],
        'model': 'gpt-4-0125-preview',
        'max_tokens': 500,
        'temperature': 0
    }
    ret = client.chat.completions.create(**kwargs)

    def remove_json_prefix(json_str):
        json_str = json_str.replace('```json\n', '').replace('```', '')
        return json_str

    queries = json.loads(remove_json_prefix(ret.choices[0].message.content))
    print('\n'.join(queries))

    data[idx]['simulated_queries'] = queries

with open('typical_tasks_example_questions.json', 'w') as file:
    json.dump(data, file, indent=4)

# Asking GPT for variables and graph

In [4]:
from openai import OpenAI
from jinja2 import Environment, FileSystemLoader, select_autoescape
import json
from tqdm import tqdm

jinja_environment = Environment(loader=FileSystemLoader('./'),
                  autoescape=select_autoescape(), trim_blocks=True, lstrip_blocks=True, line_comment_prefix='#')

template_file = 'templates/get_variables_and_graph.jinja2'
template = jinja_environment.get_template(template_file)
client = OpenAI()

def extract_json(json_str):
    if '```json' not in json_str:
        return None
    json_str = json_str.split('```json')[1]
    if '```' not in json_str:
        return None
    json_str = json_str.split('```')[0]
    return json_str


def get_cost(ret):
    return ret.usage.completion_tokens / 1e6 * 30 + ret.usage.prompt_tokens / 1e6 * 10


def get_graph():
    with open('typical_tasks_example_questions.json', 'r') as file:
        data = json.load(file)

    cost = 0
    for idx, row in enumerate(data[:3]):
        print(row)
        res_graphs = []
        for question in tqdm(row['simulated_queries']):

            prompt_parameter_values = {
                "task_description": row['Description'],
                "question": question
            }
            filled_prompt = template.render(**prompt_parameter_values)

            kwargs = {
                'messages': [{"role": "system", "content": filled_prompt}],
                'model': 'gpt-4-0125-preview',
                'max_tokens': 1000,
                'temperature': 0
            }
            
            success = False
            while not success:
                try:
                    print('attempting')
                    ret = client.chat.completions.create(**kwargs)
                    cost += get_cost(ret)

                    raw_string = ret.choices[0].message.content
                    graph = json.loads(extract_json(raw_string))
                    reasoning = raw_string.split('REASONING:')[1]
                    
                    graph.append(reasoning)
                    res_graphs.append(graph)
                    success = True
                except:
                    pass

        data[idx]['res_graphs'] = res_graphs

        with open('typical_tasks_example_questions_graphs.json', 'w') as file:
            json.dump(data, file, indent=4)
        
        print('cost:', cost)

In [5]:
get_graph()

{'Task': 'Curating a Career Plan', 'Description': 'Given a current career path and user preferences for an end-goal occupation, can we automatically curate a career plan that arrives at the end-goal occupation, with intermediate steps?', 'Exemplar Question': "I'm a first-year master's student at Stanford CS. I did previous research internships at Microsoft and Amazon on Large Language Models. I have previous publications at NLP conferences including EMNLP, NAACL, and ACL. If I want to eventually work for OpenAI as a research scientist, how should I plan my career path to maximize the probability of landing the job?", 'simulated_queries': ["I'm a software engineer with 5 years of experience in Java and Python, currently working in a mid-sized tech company. I'm interested in transitioning to a data science role in a leading tech company like Google or Facebook. What steps should I take to make this transition successful?", "I have a bachelor's degree in marketing and have been working in

  0%|          | 0/5 [00:00<?, ?it/s]

attempting


 20%|██        | 1/5 [00:23<01:32, 23.08s/it]

attempting


 40%|████      | 2/5 [00:58<01:31, 30.47s/it]

attempting


 60%|██████    | 3/5 [01:23<00:55, 27.85s/it]

attempting


 80%|████████  | 4/5 [01:47<00:26, 26.33s/it]

attempting


100%|██████████| 5/5 [02:11<00:00, 26.28s/it]


cost: 0.12432
{'Task': 'Suggesting Potential Occupations and Wage Projections', 'Description': "Given a user's interests, can we suggest potential occupations for the user and cast e.g. the wage for those occupations?", 'Exemplar Question': "I'm passionate about environmental sustainability and have a background in environmental science. What are some emerging green jobs that align with my interests, and what can I expect in terms of salary and job growth in these areas?", 'simulated_queries': ['I have a degree in computer science and love coding. What are the most in-demand programming languages, and what kind of salary can I expect in jobs requiring these skills?', 'As someone with a strong background in finance and a keen interest in technology, what fintech roles are available that could leverage my skills, and what is the typical career progression in this field?', 'I am passionate about education and have experience teaching. What are the latest trends in educational technology, 

  0%|          | 0/5 [00:00<?, ?it/s]

attempting


 20%|██        | 1/5 [00:19<01:19, 19.84s/it]

attempting


 40%|████      | 2/5 [00:41<01:02, 20.68s/it]

attempting


 60%|██████    | 3/5 [01:02<00:41, 20.87s/it]

attempting


 80%|████████  | 4/5 [01:24<00:21, 21.44s/it]

attempting


100%|██████████| 5/5 [01:48<00:00, 21.77s/it]


cost: 0.23399999999999999
{'Task': 'Making Optimal Choices Among Alternatives', 'Description': 'Given a specific situation and a set of decision alternatives, how do we make optimal choices when given a set of alternatives in a decision?', 'Exemplar Question': "I've received job offers from three different companies: Amazon AWS L4 SDE in Seattle, Meta Recommendation System E3 SDE in San Diego, and TikTok 1-2 MLE in New York. Considering my career goals of advancing into a leadership position within the next 5 years and my personal preference for a balanced work-life integration, how can I evaluate which job offer is the best overall choice?", 'simulated_queries': ["After working for 5 years as a software engineer at a mid-sized tech company, I'm considering either pursuing an MBA or joining a startup as a lead engineer. Given my long-term goal of becoming a CTO, how should I weigh these options?", 'I have the opportunity to either stay in my current role as a project manager with a foc

  0%|          | 0/5 [00:00<?, ?it/s]

attempting


 20%|██        | 1/5 [00:29<01:57, 29.42s/it]

attempting


 40%|████      | 2/5 [00:51<01:15, 25.23s/it]

attempting


 60%|██████    | 3/5 [01:20<00:53, 26.71s/it]

attempting


 80%|████████  | 4/5 [01:42<00:24, 24.81s/it]

attempting


100%|██████████| 5/5 [02:03<00:00, 24.75s/it]

cost: 0.35636





# Merge Graphs

In [8]:
from openai import OpenAI
from jinja2 import Environment, FileSystemLoader, select_autoescape
import json

jinja_environment = Environment(loader=FileSystemLoader('./'),
                  autoescape=select_autoescape(), trim_blocks=True, lstrip_blocks=True, line_comment_prefix='#')

template_file = 'templates/merge_graphs.jinja2'
template = jinja_environment.get_template(template_file)
client = OpenAI()

def extract_json(json_str):
    if '```json' not in json_str:
        return None
    json_str = json_str.split('```json')[1]
    if '```' not in json_str:
        return None
    json_str = json_str.split('```')[0]
    return json_str


def get_cost(ret):
    return ret.usage.completion_tokens / 1e6 * 30 + ret.usage.prompt_tokens / 1e6 * 10


def merge_graph():
    with open('typical_tasks_example_questions_graphs.json', 'r') as file:
        data = json.load(file)

    cost = 0
    for idx, row in enumerate(data[:3]):
        print(row)
        prompt_parameter_values = {
            "task_description": row['Description'],
            "graphs": row['res_graphs']
        }
        filled_prompt = template.render(**prompt_parameter_values)

        print('filled_prompt:', filled_prompt)

        kwargs = {
            'messages': [{"role": "system", "content": filled_prompt}],
            'model': 'gpt-4-0125-preview',
            'max_tokens': 4096,
            'temperature': 0
        }

        success = False
        while not success:
            try:
                print('attempting')
        
                ret = client.chat.completions.create(**kwargs)
                cost += get_cost(ret)

                raw_string = ret.choices[0].message.content
                graph = json.loads(extract_json(raw_string))
                reasoning = raw_string.split('REASONING:')[1]
                
                graph.append(reasoning)
                success = True
            except:
                pass

        data[idx]['merged_graphs'] = graph

        with open('typical_tasks_example_questions_graphs_merged.json', 'w') as file:
            json.dump(data, file, indent=4)
        
        print('cost:', cost)

merge_graph()

{'Task': 'Curating a Career Plan', 'Description': 'Given a current career path and user preferences for an end-goal occupation, can we automatically curate a career plan that arrives at the end-goal occupation, with intermediate steps?', 'Exemplar Question': "I'm a first-year master's student at Stanford CS. I did previous research internships at Microsoft and Amazon on Large Language Models. I have previous publications at NLP conferences including EMNLP, NAACL, and ACL. If I want to eventually work for OpenAI as a research scientist, how should I plan my career path to maximize the probability of landing the job?", 'simulated_queries': ["I'm a software engineer with 5 years of experience in Java and Python, currently working in a mid-sized tech company. I'm interested in transitioning to a data science role in a leading tech company like Google or Facebook. What steps should I take to make this transition successful?", "I have a bachelor's degree in marketing and have been working in