In [5]:
# this parses and stores the output as a dataframe. The correct prompt is:
# Generate a multiple-choice question for the following passage. First, write 'Question:' and write the question. Then write the answer options. Then write 'Correct Answer:' and give the correct answer. Finally, write explanations for the correct answer and each of the distractors on a new line starting with 'Explanation for A'.

In [1]:
import pandas as pd
import os
import re
import openai
import os
from getpass import getpass
import json
from dotenv import load_dotenv, find_dotenv
from pathlib import Path

Create a .env file in `/src` with the follow line:
`API_TOKEN={your api key}`

I added `**/.env` to `.gitignore` already. In the future, you'll have to double check that git is ignoring the secrets file.

You may need to `pip install python-dotenv`

In [2]:
%load_ext dotenv
%dotenv
import os
load_dotenv(Path(".env"))
openai_api_key = os.getenv('API_TOKEN').strip('{}')

In [3]:
DATA = '../data/'
subsections = pd.read_csv(DATA + 'subsections.csv')


In [4]:
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
model = OpenAI(temperature=0.9, openai_api_key = openai_api_key)

## Short Answer Questions

In [52]:
# The types of questions we are interested in
kinds = {'recall': 'requiring the reader to recall information from the text',
         'summarization': 'requiring the reader to summarize information from the text',
         'inference': 'requiring the reader to make inferences from the text'}

In [58]:
# Create a response schema to organize the output from the model
response_schemas = [
    ResponseSchema(name="question", description="a question based on the passage. The answer should be one sentence long."),
    ResponseSchema(name="correct_answer", description="a correct short answer to the question. The answer should be one sentence long."),
    ResponseSchema(name="incorrect_answer", description="an incorrect short answer to the question. The answer should be one sentence long.")
]
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()

In [59]:
# Create a prompt template, the input to feed to the model
prompt = PromptTemplate(
    template="generate a question based on the passage {kind}. \n{format_instructions}\n{passage}",
    input_variables=["kind", "passage"],
    partial_variables={"format_instructions": format_instructions}
)

In [76]:
'''
This is a function which takes a passage and a desired type of question, as well as the chapter, section, and subsection numbers
It returns a dataframe with one row
'''

def generate_sa_question(example, kind):
    # Input to the model
    _input = prompt.format_prompt(passage=example['clean_text'], kind=kind)
    output = model(_input.to_string())
    
    # Clean out the output string and convert it to a dataframe
    output_string = output.replace('`','').replace('json','').strip()
    # Try to decode the json
    try:
        output_dict = json.loads(output_string)
    # If it doesn't work, return an empty row
    except:
        output_dict = {'question':'', 'correct_answer':'', 'incorrect_answer':''}
        print('Error Parsing Output')

    output_dict['type'] = kind
    return output_dict

In [77]:
import datasets

ds = datasets.Dataset.from_pandas(subsections)
ds_dict = {}

for k in kinds:
    ds_dict[k] = ds.map(lambda x: generate_sa_question(x, k))

ds_dict

Map:   3%|▎         | 16/523 [01:48<1:06:18,  7.85s/ examples]

Error Parsing Output


Map:   7%|▋         | 36/523 [04:17<1:03:43,  7.85s/ examples]

Error Parsing Output


Map:  11%|█         | 57/523 [06:41<55:24,  7.13s/ examples]  

Error Parsing Output


Map:  12%|█▏        | 64/523 [07:25<49:11,  6.43s/ examples]

Error Parsing Output


Map:  12%|█▏        | 65/523 [07:35<57:10,  7.49s/ examples]

Error Parsing Output


Map:  15%|█▍        | 77/523 [09:12<1:01:17,  8.25s/ examples]

Error Parsing Output


Map:  16%|█▌        | 83/523 [09:50<53:23,  7.28s/ examples]  

Error Parsing Output


Map:  27%|██▋       | 140/523 [16:09<40:07,  6.29s/ examples]

Error Parsing Output


Map:  30%|██▉       | 156/523 [18:20<44:56,  7.35s/ examples]

Error Parsing Output


Map:  32%|███▏      | 168/523 [19:38<34:12,  5.78s/ examples]

Error Parsing Output


Map:  36%|███▌      | 186/523 [21:45<43:31,  7.75s/ examples]

Error Parsing Output


Map:  37%|███▋      | 193/523 [22:41<39:36,  7.20s/ examples]

Error Parsing Output


Map:  37%|███▋      | 194/523 [22:48<38:50,  7.08s/ examples]

Error Parsing Output


Map:  45%|████▍     | 235/523 [28:10<38:44,  8.07s/ examples]

Error Parsing Output


Map:  49%|████▉     | 256/523 [31:02<55:46, 12.53s/ examples]

Error Parsing Output


Map:  53%|█████▎    | 275/523 [33:39<29:25,  7.12s/ examples]

Error Parsing Output


Map:  54%|█████▎    | 281/523 [34:18<21:14,  5.27s/ examples]

Error Parsing Output


Map:  57%|█████▋    | 299/523 [36:39<34:00,  9.11s/ examples]

Error Parsing Output


Map:  60%|█████▉    | 312/523 [37:51<20:39,  5.87s/ examples]

Error Parsing Output


Map:  60%|██████    | 314/523 [38:03<20:50,  5.98s/ examples]

Error Parsing Output


Map:  61%|██████    | 318/523 [38:33<23:48,  6.97s/ examples]

Error Parsing Output


Map:  64%|██████▍   | 334/523 [40:14<21:02,  6.68s/ examples]

Error Parsing Output


Map:  74%|███████▍  | 389/523 [46:33<15:20,  6.87s/ examples]

Error Parsing Output


Map:  75%|███████▍  | 391/523 [46:45<13:46,  6.26s/ examples]

Error Parsing Output


Map:  78%|███████▊  | 407/523 [48:52<17:34,  9.09s/ examples]

Error Parsing Output


Map:  80%|████████  | 419/523 [50:13<12:47,  7.38s/ examples]

Error Parsing Output


Map:  82%|████████▏ | 430/523 [51:41<13:52,  8.96s/ examples]

Error Parsing Output


Map:  83%|████████▎ | 434/523 [52:14<11:54,  8.03s/ examples]Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised APIError: Request failed due to server shutdown {
  "error": {
    "message": "Request failed due to server shutdown",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'Request failed due to server shutdown', 'type': 'server_error', 'param': None, 'code': None}} {'Date': 'Wed, 21 Jun 2023 16:39:14 GMT', 'Content-Type': 'application/json', 'Content-Length': '141', 'Connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'text-davinci-003', 'openai-organization': 'ai-aloe-georgia-institute-of-technology', 'openai-processing-ms': '9797', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3000', 'x-ratelimit-limit-tokens': '250000', 'x-ratelimit-remaining-requests': '299

Error Parsing Output


Map:  90%|█████████ | 473/523 [57:55<05:25,  6.50s/ examples]

Error Parsing Output


Map:  91%|█████████ | 476/523 [58:22<06:24,  8.17s/ examples]

Error Parsing Output


Map:  93%|█████████▎| 484/523 [59:22<05:15,  8.10s/ examples]

Error Parsing Output


Map:   1%|          | 3/523 [00:21<1:06:50,  7.71s/ examples]  

Error Parsing Output


Map:   7%|▋         | 36/523 [03:58<1:05:31,  8.07s/ examples]

Error Parsing Output


Map:  11%|█         | 55/523 [06:04<48:45,  6.25s/ examples]  

Error Parsing Output


Map:  11%|█         | 57/523 [06:16<49:15,  6.34s/ examples]

Error Parsing Output


Map:  12%|█▏        | 64/523 [07:08<53:04,  6.94s/ examples]  

Error Parsing Output


Map:  12%|█▏        | 65/523 [07:16<55:07,  7.22s/ examples]

Error Parsing Output


Map:  15%|█▍        | 77/523 [08:37<50:58,  6.86s/ examples]  

Error Parsing Output


Map:  19%|█▉        | 99/523 [11:23<1:01:24,  8.69s/ examples]

Error Parsing Output


Map:  30%|██▉       | 156/523 [17:58<37:44,  6.17s/ examples]  

Error Parsing Output


Map:  37%|███▋      | 194/523 [22:41<40:28,  7.38s/ examples]

Error Parsing Output


Map:  40%|████      | 211/523 [24:31<36:09,  6.95s/ examples]

Error Parsing Output


Map:  49%|████▉     | 256/523 [29:43<38:58,  8.76s/ examples]

Error Parsing Output


Map:  53%|█████▎    | 275/523 [32:10<29:18,  7.09s/ examples]

Error Parsing Output


Map:  54%|█████▎    | 281/523 [32:44<22:47,  5.65s/ examples]

Error Parsing Output


Map:  55%|█████▍    | 287/523 [33:29<25:31,  6.49s/ examples]

Error Parsing Output


Map:  57%|█████▋    | 299/523 [34:57<29:59,  8.03s/ examples]

Error Parsing Output


Map:  60%|██████    | 315/523 [36:34<20:37,  5.95s/ examples]

Error Parsing Output


Map:  61%|██████    | 318/523 [36:54<23:43,  6.95s/ examples]

Error Parsing Output


Map:  61%|██████    | 319/523 [36:59<22:11,  6.53s/ examples]

Error Parsing Output


Map:  63%|██████▎   | 330/523 [38:03<23:52,  7.42s/ examples]

Error Parsing Output


Map:  64%|██████▍   | 334/523 [38:25<18:25,  5.85s/ examples]

Error Parsing Output


Map:  67%|██████▋   | 350/523 [39:55<16:54,  5.86s/ examples]

Error Parsing Output


Map:  70%|██████▉   | 366/523 [41:58<22:49,  8.72s/ examples]

Error Parsing Output


Map:  74%|███████▍  | 389/523 [44:51<16:12,  7.26s/ examples]

Error Parsing Output


Map:  78%|███████▊  | 407/523 [46:51<15:12,  7.86s/ examples]

Error Parsing Output


Map:  82%|████████▏ | 430/523 [49:42<12:38,  8.15s/ examples]

Error Parsing Output


Map:  84%|████████▎ | 438/523 [50:34<07:43,  5.45s/ examples]

Error Parsing Output


Map:  91%|█████████ | 476/523 [54:51<04:08,  5.29s/ examples]

Error Parsing Output


Map:  93%|█████████▎| 486/523 [55:55<03:37,  5.88s/ examples]

Error Parsing Output


Map:  94%|█████████▎| 490/523 [56:21<03:22,  6.15s/ examples]

Error Parsing Output


Map:   4%|▍         | 22/523 [02:21<44:34,  5.34s/ examples]   

Error Parsing Output


Map:  11%|█▏        | 59/523 [06:45<57:22,  7.42s/ examples]  

Error Parsing Output


Map:  12%|█▏        | 64/523 [07:19<51:09,  6.69s/ examples]  

Error Parsing Output


Map:  12%|█▏        | 65/523 [07:26<52:28,  6.87s/ examples]

Error Parsing Output


Map:  22%|██▏       | 116/523 [13:14<52:38,  7.76s/ examples] 

Error Parsing Output


Map:  30%|██▉       | 156/523 [19:00<1:00:18,  9.86s/ examples]

Error Parsing Output


Map:  32%|███▏      | 168/523 [20:36<51:14,  8.66s/ examples]  

Error Parsing Output


Map:  37%|███▋      | 194/523 [24:20<46:32,  8.49s/ examples]

Error Parsing Output


Map:  37%|███▋      | 196/523 [24:40<51:08,  9.39s/ examples]

Error Parsing Output


Map:  38%|███▊      | 201/523 [25:21<43:35,  8.12s/ examples]

Error Parsing Output


Map:  39%|███▉      | 205/523 [26:11<54:59, 10.38s/ examples]  

Error Parsing Output


Map:  43%|████▎     | 226/523 [29:47<57:21, 11.59s/ examples]  

Error Parsing Output


Map:  49%|████▉     | 256/523 [34:29<39:55,  8.97s/ examples]

Error Parsing Output


Map:  53%|█████▎    | 275/523 [37:47<41:13,  9.97s/ examples]

Error Parsing Output


Map:  54%|█████▎    | 281/523 [38:41<34:58,  8.67s/ examples]

Error Parsing Output


Map:  54%|█████▍    | 282/523 [38:48<32:14,  8.03s/ examples]

Error Parsing Output


Map:  57%|█████▋    | 299/523 [41:17<36:50,  9.87s/ examples]

Error Parsing Output


Map:  59%|█████▊    | 306/523 [42:13<29:38,  8.20s/ examples]

Error Parsing Output


Map:  60%|██████    | 314/523 [43:27<38:49, 11.14s/ examples]

Error Parsing Output


Map:  63%|██████▎   | 330/523 [45:54<28:21,  8.82s/ examples]

Error Parsing Output


Map:  64%|██████▍   | 334/523 [46:25<25:06,  7.97s/ examples]

Error Parsing Output


Map:  66%|██████▌   | 343/523 [47:38<21:30,  7.17s/ examples]

Error Parsing Output


Map:  67%|██████▋   | 350/523 [48:55<34:02, 11.80s/ examples]

Error Parsing Output


Map:  70%|██████▉   | 366/523 [51:33<26:47, 10.24s/ examples]

Error Parsing Output


Map:  76%|███████▌  | 395/523 [56:41<19:54,  9.33s/ examples]

Error Parsing Output


Map:  77%|███████▋  | 403/523 [58:06<22:59, 11.50s/ examples]

Error Parsing Output


Map:  78%|███████▊  | 407/523 [59:08<27:18, 14.12s/ examples]

Error Parsing Output


Map:  82%|████████▏ | 430/523 [1:03:34<24:01, 15.50s/ examples]

Error Parsing Output


Map:  83%|████████▎ | 432/523 [1:03:52<18:32, 12.22s/ examples]

Error Parsing Output


Map:  84%|████████▎ | 438/523 [1:04:57<14:37, 10.32s/ examples]

Error Parsing Output


Map:  86%|████████▌ | 450/523 [1:07:03<14:38, 12.04s/ examples]

Error Parsing Output


Map:  91%|█████████ | 476/523 [1:11:10<06:38,  8.48s/ examples]

Error Parsing Output


Map:  94%|█████████▎| 490/523 [1:13:15<05:21,  9.74s/ examples]

Error Parsing Output


Map:  94%|█████████▍| 493/523 [1:13:51<05:37, 11.23s/ examples]

Error Parsing Output


Map:  96%|█████████▌| 503/523 [1:15:28<03:25, 10.29s/ examples]

Error Parsing Output


Map:  98%|█████████▊| 512/523 [1:16:50<01:39,  9.08s/ examples]

Error Parsing Output


                                                               

{'recall': Dataset({
     features: ['module', 'chapter', 'section', 'subsection', 'heading', 'raw_text', 'clean_text', 'slug', 'question', 'correct_answer', 'incorrect_answer', 'type'],
     num_rows: 523
 }),
 'summarization': Dataset({
     features: ['module', 'chapter', 'section', 'subsection', 'heading', 'raw_text', 'clean_text', 'slug', 'question', 'correct_answer', 'incorrect_answer', 'type'],
     num_rows: 523
 }),
 'inference': Dataset({
     features: ['module', 'chapter', 'section', 'subsection', 'heading', 'raw_text', 'clean_text', 'slug', 'question', 'correct_answer', 'incorrect_answer', 'type'],
     num_rows: 523
 })}

In [92]:
df_list = [v.to_pandas() for v in ds_dict.values()]
big_df = pd.concat(df_list)
big_df.reset_index().to_csv('../data/aqag-chatcpt.csv')

In [91]:
big_df1 = big_df
big_df1[big_df1['index'] == 3]

Unnamed: 0,index,module,chapter,section,subsection,heading,raw_text,clean_text,slug,question,correct_answer,incorrect_answer,type
3,3,1,1,1,1,Introduction to FRED,Data is very important in economics because it...,Data is very important in economics because it...,introduction-to-fred,What is the name of the database where most of...,The St. Louis Federal Reserve Bank's FRED data...,The US Census Bureau database.,recall
526,3,1,1,1,1,Introduction to FRED,Data is very important in economics because it...,Data is very important in economics because it...,introduction-to-fred,What website does the course primarily use for...,The course primarily uses the St. Louis Federa...,The course primarily uses the St. Louis Federa...,summarization
1049,3,1,1,1,1,Introduction to FRED,Data is very important in economics because it...,Data is very important in economics because it...,introduction-to-fred,Which government agencies are sources of econo...,A variety of government agencies,FRED,inference


In [62]:
'''
This cell generates short answer questions and correct and incorrect answer for each row of the subsections dataframe
'''

df = pd.DataFrame({'clean_text':[], 'question':[], 'correct_answer':[], 'incorrect_answer':[], 'type':[], 'chapter':[], 'section':[], 'subsection':[]})
# Count how many rows chatGPT failed at
problem_rows = 0

for row in subsections.iterrows():
    print(row[0], end='\r')
    passage = row[1]['clean_text']
    for kind in kinds:
        try: 
            line = generate_sa_question(passage, kind, row[1]['chapter'], row[1]['section'], row[1]['subsection'])
            df = pd.concat([df, line], axis=0)
        except:
            problem_rows += 1
            continue

print(f'Trouble parsing {problem_rows} subsections')
        
        

Trouble parsing 1569 subsections


In [None]:
df

In [None]:
df.reset_index(drop=True).to_csv('../results/aqg.csv')

## Multiple Choice Questions

In [None]:
template = """
I want you to generate a multiple choice question for a passage with one correct answer and explanations for all answers using this as an example:

Question:

A:
B:
C:
D:

Correct Answer:

Explanation for A:
Explanation for B:
Explanation for C:
Explanation for D:

Here is the passage I want you to use to generate the question: {passage}
"""

prompt = PromptTemplate(
    input_variables=["passage"],
    template = template)

questions_dict = {'id':[], 'chapter':[], 'section':[], 'subsection':[], 'item':[]}
counter = 0
for row in subsections.iterrows():
    print(counter, end='\r')
    questions_dict['id'].append(row[0])
    questions_dict['chapter'].append(row[1]['chapter'])
    questions_dict['section'].append(row[1]['section'])
    questions_dict['subsection'].append(row[1]['subsection'])
    questions_dict['item'].append(llm(prompt.format(passage=row[1]['clean_text'])))
    counter +=1


116

In [None]:
questions_df = pd.DataFrame.from_dict(questions_dict)
questions_df.to_csv('../data/gpt_questions.csv')

In [6]:
def generate_question(summary):
    prompt = """Generate a multiple-choice question, give the correct answer, and give explanations for the distractors for the following passage. Please use the following format:
    Question: [Write the question here.] 
    A. [Write option A here]
    B. [Write option B here]
    C. [Write option C here]
    D. [Write option D here]

    Correct Answer: [Write the correct answer here] 

    Explanation for A: [Write the explanation for A here]
    Explanation for B: [Write the explanation for B here]
    Explanation for C: [Write the explanation for C here]
    Explanation for D: [Write the explanation for D here]
    
"""
    text_input = prompt + summary
    completions = openai.Completion.create(engine="text-davinci-003", prompt=text_input, max_tokens=2048, n=1,stop=None,temperature=0.7)
    message = completions.choices[0].text
    return message

In [7]:
def chatGPTparseMCQ(output):    
    output_dict['question'].append(re.search('(Question:)[\w\d\s]*/?', output).group(0)[10:])
    output = re.sub('(Question:)[\w\d\s]*/?', '', output).strip()

    output_dict['answer_a'].append(re.search('A.+', output).group(0)[3:])
    output = re.sub('A.+', '', output, count=1).strip()
    output_dict['answer_b'].append(re.search('B.+', output).group(0)[3:])
    output = re.sub('B.+', '', output, count=1).strip()
    output_dict['answer_c'].append(re.search('C.+', output).group(0)[3:])
    output = re.sub('C.+', '', output, count=1).strip()
    output_dict['answer_d'].append(re.search('D.+', output).group(0)[3:])
    output = re.sub('D.+', '', output, count=1).strip()

    output_dict['correct_answer'].append(re.search('[Cc]orrect [Aa]nswer: .', output).group(0)[-1])
    output = re.sub('[Cc]orrect [Aa]nswer: .+', '', output, count=1).strip()

    a = re.search('Explanation for A.+', output)
    if a == None:
        output_dict['explanation_a'].append('')
    else:
        output_dict['explanation_a'].append(a.group(0)[18:])

    b = re.search('Explanation for B.+', output)
    if b == None:
        output_dict['explanation_b'].append('')
    else:
        output_dict['explanation_b'].append(b.group(0)[18:])
    
    c = re.search('Explanation for C.+', output)
    if c == None:
        output_dict['explanation_c'].append('')
    else:
        output_dict['explanation_c'].append(c.group(0)[18:])
        
    d = re.search('Explanation for D.+', output)
    if d == None:
        output_dict['explanation_d'].append('')
    else:
        output_dict['explanation_d'].append(d.group(0)[18:])




In [None]:
output_dict = {'chapter': [], 'section': [], 'subsection_number':[], 'subsection_slug': [], 'question': [], 'answer_a':[], 'answer_b':[], 'answer_c':[], 'answer_d':[], 
                'explanation_a':[], 'explanation_b':[], 'explanation_c':[], 'explanation_d':[], 
                'correct_answer':[]}

for row in subsections.iterrows():
    subsection = row[1]
    output_dict['chapter'].append(subsection['chapter'])
    output_dict['section'].append(subsection['section'])
    output_dict['subsection_number'].append(subsection['subsection'])
    output_dict['subsection_slug'].append(subsection['slug'])
    output = generate_question(subsection['clean_text'])
    print(output)
    chatGPTparseMCQ(output)
    
pd.DataFrame.from_dict(output_dict).to_csv(DATA+'mcq_table.csv')

In [None]:
pd.DataFrame.from_dict(output_dict)