In [1]:
from vllm import LLM, SamplingParams
import os
from dotenv import load_dotenv
import pandas as pd

load_dotenv()
url = os.getenv('SUPABASE_URL')
key = os.getenv('SUPABASE_KEY')

from supabase import create_client, Client
supabase = create_client(supabase_url=url, supabase_key=key)
subsections_df = pd.DataFrame(supabase.table('subsections').select('*').execute().data)

2023-11-29 16:57:36,843:INFO - HTTP Request: GET https://amvqfibhtaccpdzunrur.supabase.co/rest/v1/subsections?select=%2A "HTTP/1.1 200 OK"


In [2]:
definition_dict = {'inference' : '''
1.Inference: Inferential, or implicit, questions are answered by interpreting clues from part of the
text to figure something out. Students need to be able to answer inferential questions to see if they
are   understanding   the   meaning   behind   certain   events/character's   feelings.   Need   to   speak   about
making   connections   between   segments   of   texts   to   fill   in   gaps   related   to   comprehension.
Inferences are implicit and are based on using background knowledge or text information to make
conclusions.
''',
                   'reflection' : '''
2.Reflection:   A   reflection   question   is   what   we   call   any   question   that   makes   a   student   look   back
over   what   or   how   they   have   learned.   Reflection   questions   often   assess   metacognitive   skills,
otherwise known as thinking about how we think and learn. Relevant elaborations expand given
information.   Relevant   elaborations   add   information   that   supports   or   clarifies   conclusions.
''',
                   'elaboration' : '''                   
3.Elaboration:   These   questions   help   to   extend   and   broaden   the   importance   of   the   meaning. 
Learners can elaborate on the question making it more personal to them. Make sure that these are
related to a readers' background knowledge and personal experience.
'''}

In [3]:
llm = LLM("Open-Orca/OpenOrcaxOpenChat-Preview2-13B")

INFO 11-29 16:57:37 llm_engine.py:72] Initializing an LLM engine with config: model='Open-Orca/OpenOrcaxOpenChat-Preview2-13B', tokenizer='Open-Orca/OpenOrcaxOpenChat-Preview2-13B', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=1, quantization=None, seed=0)


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
2023-11-29 16:57:38,749:INFO - Added key: store_based_barrier_key:1 to store for rank: 0
2023-11-29 16:57:38,751:INFO - Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 1 nodes.
2023-11-29 16:57:39,906:INFO - Added key: store_based_barrier_key:2 to store for rank: 0
2023-11-29 16:57:39,908:INFO - Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 1 nodes.
2023-11-29 16:57:39,910:INFO - Added key: store_based_barrier_key:3 to store for rank: 0
2023-11-29 16:57:39,911:INFO - Rank 0: Completed store-based barrier for key:store_based_barrier_key:3 with 1 nodes.


INFO 11-29 16:58:06 llm_engine.py:207] # GPU blocks: 1471, # CPU blocks: 327


In [4]:
subsections_df = subsections_df[['subsection_id', 'clean_text']].copy()

In [5]:


topic = 'computer programming in python'
question_types = list(definition_dict.keys())
excerpt = subsections_df.iloc[23]['clean_text']

def generate_qa_output(excerpt, question_type):
    type_def = definition_dict[question_type]
    preface = f'''
    I will take the following quoted text about {topic} and create a short-answer question with only one correct answer.
    I will also include the correct answer. 
    The following is the text on the aforementioned topic:
    
    [START OF EXCERPT]
    {excerpt}
    [END OF EXCERPT]
    '''
    
    command = f'''
    Here is a short-answer question for the text above. This will be a {question_type} question. Here is a definition of that type of question: {type_def}
    First, I will write the type of question. Then, I will write the question. Finally, I will write the correct answer. At the end I will write [END OF QUESTION].
    '''
    
    input = preface + command
    
    sampling_params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=248)
    response = llm.generate(input, sampling_params, use_tqdm=False)[0].outputs[0].text.split('END OF QUESTION')[0].strip('[').strip()
    return response

In [6]:
from tqdm import tqdm
tqdm.pandas()

for question_type in question_types:
    print(question_type)
    subsections_df[question_type] = subsections_df['clean_text'].progress_apply(lambda x: generate_qa_output(x, question_type))

inference


100%|██████████| 56/56 [09:55<00:00, 10.63s/it]


reflection


100%|██████████| 56/56 [09:55<00:00, 10.64s/it]


elaboration


100%|██████████| 56/56 [09:54<00:00, 10.62s/it]


In [13]:
print(subsections_df.iloc[30]['reflection'])

Type of Question: Reflection
    
    Question: Look back at the information provided in the text. Based on the examples given, what is the difference between the variables message, n, and pi?
    
    Correct Answer: The variable message is a string, n is an integer, and pi is an approximate value.
