In [1]:
!pip install -q stanza

In [2]:
import stanza
import math
import random

In [3]:
nlp_mask = stanza.Pipeline('en')

2024-11-27 06:09:42 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

2024-11-27 06:09:42 INFO: Downloaded file to /home/nphutela/stanza_resources/resources.json
2024-11-27 06:09:43 INFO: Loading these models for language: en (English):
| Processor    | Package                   |
--------------------------------------------
| tokenize     | combined                  |
| mwt          | combined                  |
| pos          | combined_charlm           |
| lemma        | combined_nocharlm         |
| constituency | ptb3-revised_charlm       |
| depparse     | combined_charlm           |
| sentiment    | sstplus_charlm            |
| ner          | ontonotes-ww-multi_charlm |

2024-11-27 06:09:43 INFO: Using device: cuda
2024-11-27 06:09:43 INFO: Loading: tokenize
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-11-27 06:09:45 INFO: Loading: mwt
  checkpoint = torch.load(filename, lambda storage, loc: storage)
2024-11-27 06:09:45 INFO: Loading: pos
  checkpoint = torch.load(filename, lambda storage, loc: storage)
  data = torch.lo

In [4]:
def drop_keyterms(key_terms):
  if len(key_terms) <= 4:
    return key_terms
  percentage_drop = 0.2
  drop_count = math.ceil(len(key_terms) * percentage_drop)
  terms_to_drop = set(random.sample(key_terms, drop_count))

  # Keep the remaining terms in their original order
  remaining_terms = [term for term in key_terms if term not in terms_to_drop]

  return remaining_terms

In [5]:
def extract_terms(question):
  doc = nlp_mask(question)

  # Extract named entities (NER)
  key_terms = [ent.text for ent in doc.ents]

  # Extract compound phrases and adjective-noun pairs using dependency parsing
  for sentence in doc.sentences:
      for word in sentence.words:
          if word.deprel in ['compound', 'amod']:  # compound nouns and adjective modifiers
              # Combine the modifier and the head
              compound_term = f"{word.text} {sentence.words[word.head - 1].text}"
              if compound_term not in key_terms:
                  key_terms.append(compound_term)

  # Add remaining single keywords (nouns, proper nouns, verbs, adverbs)
  for sentence in doc.sentences:
      for word in sentence.words:
          if word.upos in ['NOUN', 'PROPN', 'ADJ', 'VERB', 'ADV']:
              if word.text not in key_terms:
                  key_terms.append(word.text)

  terms_extract = drop_keyterms(key_terms)

  return terms_extract

In [6]:
import anthropic

In [None]:
client = anthropic.Anthropic(api_key="")

In [None]:
import anthropic

def generate_relevant_info_claude(key_terms: list):
    # Join key terms into a single string
    terms = ", ".join(key_terms)

    # Define the system prompt
    system_prompt = (
        "You are a helpful assistant that gives relevant information. "
        "Provide a comprehensive context or paragraph that meaningfully incorporates the given key terms or words."
    )

    # Define the user prompt
    user_prompt = f"Provide relevant information and context incorporating the following key terms: {terms}"

    # Create an instance of the Anthropic client
    client = anthropic.Anthropic(api_key="")

    response = client.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=2048,
        system=system_prompt, # <-- role prompt
        messages=[
            {"role": "user", "content": user_prompt}
        ]
    )

    # Extract and return Claude's response content
    completion = ''.join([block.text for block in response.content])
    return completion

In [None]:
from openai import OpenAI
def judge_response(question, answer, response):
    system_prompt = "You are role is to judge a given task. Provide your judgement in just one word, either TRUE or FALSE."
    user_prompt = f"""
    Can the given idea be entailed from the context?
    Idea: {answer}
    Context: {response}
    """
    client = OpenAI(api_key = "")    

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.2
    )

    completions = response.choices[0].message.content.strip()

    if completions == "TRUE":
        return True
    elif completions == "FALSE":
        return False
    else:
        return ""

In [10]:
import pandas as pd

# MMLU

In [11]:
df_mmlu = pd.read_csv('/scratch/nphutela/NLP Project/CSE576-Project/Data/MMLU.csv')

df_m = pd.DataFrame()

df_m['question'] = df_mmlu['question']

print(df_m.head())

                                            question
0  A fluorescent molecule of 1,000 daltons inject...
1    Which of the following is not a true statement?
2  Surveys of older adults who are living togethe...
3  Third-year student is studying contracts. He h...
4  The particular quality of the U.S. health care...


In [12]:
def parse_choices(choices_string):
    choices_list = choices_string.strip("[]").split(" '")
    choices_list = [choice.replace("'", "").strip() for choice in choices_list]
    return choices_list

In [13]:
df_mmlu

Unnamed: 0,question,choices,answer
0,"A fluorescent molecule of 1,000 daltons inject...",['spot desmosome' 'belt desmosome' 'gap juncti...,2
1,Which of the following is not a true statement?,['Muscle glycogen is broken down enzymatically...,3
2,Surveys of older adults who are living togethe...,['A first step toward eventual marriage' 'An a...,1
3,Third-year student is studying contracts. He h...,"[""An assignment that continues the obligor's d...",0
4,The particular quality of the U.S. health care...,"['Systematic, democratic approach' 'Fragmented...",1
...,...,...,...
95,Which of the following is an example of a chem...,['Tearing up a piece of paper' 'Melting ice'\n...,2
96,Buyer purchases an item from a merchant that t...,['Express warranty' 'Implied warranty' 'Warran...,3
97,Intermediaries assist end-users by bringing a ...,['Time utility.' 'Place utility.' 'Ownership u...,1
98,Which step explains how to find the value of a...,['add 6 to both sides' 'divide both sides by 6...,1


In [14]:
df_m['choices'] = df_mmlu['choices'].apply(parse_choices)

In [15]:
print(df_m.head())

                                            question  \
0  A fluorescent molecule of 1,000 daltons inject...   
1    Which of the following is not a true statement?   
2  Surveys of older adults who are living togethe...   
3  Third-year student is studying contracts. He h...   
4  The particular quality of the U.S. health care...   

                                             choices  
0  [spot desmosome, belt desmosome, gap junction,...  
1  [Muscle glycogen is broken down enzymatically ...  
2  [A first step toward eventual marriage, An alt...  
3  ["An assignment that continues the obligors du...  
4  [Systematic, democratic approach, Fragmented, ...  


In [16]:
df_m['answer'] = None

problematic_rows = []

for index, row in df_m.iterrows():

  try:
        # Access and process the 'choices' and 'answer' columns
        # print("inx: ", index, "row: ", row)
      df_m.at[index, 'answer'] = df_m.at[index, 'choices'][df_mmlu.at[index, 'answer']]
  except Exception as e:
      # Log the index of problematic rows
      print(f"Error at index {index}: {e}")
      problematic_rows.append(index)

# Drop problematic rows after processing all rows
if problematic_rows:
    df_m = df_m.drop(index=problematic_rows).reset_index(drop=True)
    print(f"Dropped rows with indices: {problematic_rows}")

# Display the processed DataFrame
print(df_m.head())

Error at index 17: list index out of range
Error at index 25: list index out of range
Error at index 31: list index out of range
Error at index 67: list index out of range
Error at index 68: list index out of range
Dropped rows with indices: [17, 25, 31, 67, 68]
                                            question  \
0  A fluorescent molecule of 1,000 daltons inject...   
1    Which of the following is not a true statement?   
2  Surveys of older adults who are living togethe...   
3  Third-year student is studying contracts. He h...   
4  The particular quality of the U.S. health care...   

                                             choices  \
0  [spot desmosome, belt desmosome, gap junction,...   
1  [Muscle glycogen is broken down enzymatically ...   
2  [A first step toward eventual marriage, An alt...   
3  ["An assignment that continues the obligors du...   
4  [Systematic, democratic approach, Fragmented, ...   

                                              answer  
0       

In [17]:
df_m['extracted_terms'] = None
df_m['can_be_entailed'] = None

# Loop through each row using iterrows()
for index, row in df_m.iterrows():
  ET = extract_terms(row['question'])
  df_m.at[index, 'extracted_terms'] = ET

  R = generate_relevant_info_claude(ET)

  J = judge_response(row['question'], row['answer'], R)

  df_m.at[index, 'can_be_entailed'] = J

print(df_m.head)

<bound method NDFrame.head of                                              question  \
0   A fluorescent molecule of 1,000 daltons inject...   
1     Which of the following is not a true statement?   
2   Surveys of older adults who are living togethe...   
3   Third-year student is studying contracts. He h...   
4   The particular quality of the U.S. health care...   
..                                                ...   
90  Which of the following is an example of a chem...   
91  Buyer purchases an item from a merchant that t...   
92  Intermediaries assist end-users by bringing a ...   
93  Which step explains how to find the value of a...   
94  Which of the following countries is actively t...   

                                              choices  \
0   [spot desmosome, belt desmosome, gap junction,...   
1   [Muscle glycogen is broken down enzymatically ...   
2   [A first step toward eventual marriage, An alt...   
3   ["An assignment that continues the obligors du...   


In [18]:
df_m.to_csv('relevant_info_mmlu_3_claude.csv', index=False)

# Commonsense

In [19]:
import re

In [20]:
def get_answer(ans_str, option):
    ans_str = ans_str.replace("\t", "").replace("\n", "").replace(" ", "")
    cleaned_string = re.sub(r"array\(|,dtype=object\)", "", ans_str)
    
    # Evaluate the cleaned string as a dictionary
    parsed_dict = eval(cleaned_string)
    
    # Extract the text array
    text_array = parsed_dict['text']
    
    return text_array[option]

In [21]:
df_commonsense = pd.read_csv('/scratch/nphutela/NLP Project/CSE576-Project/Seen Q and A/Data/commonsense_qa.csv')

df_c = pd.DataFrame()

df_c['question'] = df_commonsense['question']

print(df_c.head())

                                            question
0  Miranda wasn't sure about what she was doing, ...
1  The police busted down his door, he realized o...
2  What kind of wine list can I expect to see in ...
3  What performance is expected of a student in s...
4  The man was crass and rude, all the what in th...


In [22]:
df_c['answer'] = None

option = {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4}

for index, row in df_c.iterrows():
    ans = get_answer(df_commonsense.at[index, 'choices'], option[df_commonsense.at[index, 'answerKey']])
    df_c.at[index, 'answer'] = ans

print(df_c.head())

                                            question      answer
0  Miranda wasn't sure about what she was doing, ...   staystill
1  The police busted down his door, he realized o...      scheme
2  What kind of wine list can I expect to see in ...      french
3  What performance is expected of a student in s...  passcourse
4  The man was crass and rude, all the what in th...       women


In [24]:
df_c['extracted_terms'] = None
df_c['rel_info_generated'] = None
df_c['can_be_entailed'] = None

# Loop through each row using iterrows()
for index, row in df_c.iterrows():
    ET = extract_terms(row['question'])
    df_c.at[index, 'extracted_terms'] = ET
    
    R = generate_relevant_info_claude(ET)
    df_c.at[index, 'rel_info_generated'] = R
    
    J = judge_response(row['question'], row['answer'], R)
    
    df_c.at[index, 'can_be_entailed'] = J

print(df_c.head)

<bound method NDFrame.head of                                              question         answer  \
0   Miranda wasn't sure about what she was doing, ...      staystill   
1   The police busted down his door, he realized o...         scheme   
2   What kind of wine list can I expect to see in ...         french   
3   What performance is expected of a student in s...     passcourse   
4   The man was crass and rude, all the what in th...          women   
..                                                ...            ...   
95  Joe learned some relaxing exercises from his t...  deepbreathing   
96  James turned on his computer and the monitor l...     castshadow   
97  If an insurance adjuster finds your claim inco...           deny   
98  Bill loved riding his bike and did it all the ...     exhaustion   
99  After I urinate and flush the toilet and wash ...            eat   

                                      extracted_terms  \
0   [Miranda, sure, doing, just, knew, stop, mov

In [25]:
df_c.to_csv('relevant_info_commonsense_3_claude.csv', index=False)

# GSM8k

In [26]:
df_gsm8k = pd.read_csv('/scratch/nphutela/NLP Project/CSE576-Project/Data/GSM8k.csv')

df_g = pd.DataFrame()

df_g['question'] = df_gsm8k['question']
df_g['answer'] = df_gsm8k['answer']


print(df_g.head())

                                            question  \
0  Youngsville had a population of 684 people.  T...   
1  Bill is stocking the kitchenware section of th...   
2  Cilia wants to buy a multi-level dumbbell syst...   
3  Lydia is planning a road trip with her family ...   
4  Jack is stranded on a desert island. He wants ...   

                                              answer  
0  The town had 684 people then had a 25% growth ...  
1  First find how many pots fit on one shelf: 5 p...  
2  The first pair of weights weighs 3 lb each so ...  
3  In the second third of the trip, Lydia is goin...  
4  First find how many liters of the seawater are...  


In [27]:
df_g['extracted_terms'] = None
df_g['rel_info_generated'] = None
df_g['can_be_entailed'] = None

# Loop through each row using iterrows()
for index, row in df_g.iterrows():
  ET = extract_terms(row['question'])
  df_g.at[index, 'extracted_terms'] = ET

  R = generate_relevant_info_claude(ET)
  df_g.at[index, 'rel_info_generated'] = R

  J = judge_response(row['question'], row['answer'], R)

  df_g.at[index, 'can_be_entailed'] = J

print(df_g.head)

<bound method NDFrame.head of                                              question  \
0   Youngsville had a population of 684 people.  T...   
1   Bill is stocking the kitchenware section of th...   
2   Cilia wants to buy a multi-level dumbbell syst...   
3   Lydia is planning a road trip with her family ...   
4   Jack is stranded on a desert island. He wants ...   
..                                                ...   
95  When Doctor Lindsay works in her office, she u...   
96  Ali turned 8 years old this year, and is now 3...   
97  Elizabeth wants to buy the steak knives from a...   
98  Kristine has 7 more CDs than Dawn. If Dawn has...   
99  Olivia's insurance premium starts out at $50/m...   

                                               answer  \
0   The town had 684 people then had a 25% growth ...   
1   First find how many pots fit on one shelf: 5 p...   
2   The first pair of weights weighs 3 lb each so ...   
3   In the second third of the trip, Lydia is goin...   


In [28]:
df_g.to_csv('relevant_info_gsm8k_3_claude.csv', index=False)

# TruthfulQA

In [29]:
df_truthful = pd.read_csv('TruthfulQA.csv')

df_t = pd.DataFrame()

df_t['question'] = df_truthful['Question']
df_t['answer'] = df_truthful['Best Answer']


print(df_c.head())

                                            question      answer  \
0  Miranda wasn't sure about what she was doing, ...   staystill   
1  The police busted down his door, he realized o...      scheme   
2  What kind of wine list can I expect to see in ...      french   
3  What performance is expected of a student in s...  passcourse   
4  The man was crass and rude, all the what in th...       women   

                                     extracted_terms  \
0  [Miranda, sure, doing, just, knew, stop, movin...   
1  [grander what, police, busted, door, realized,...   
2             [Paris, wine list, wine, list, expect]   
3  [student sessions, school sessions, student, s...   
4             [crass, rude, office, filed, together]   

                                  rel_info_generated can_be_entailed  
0  Here is a paragraph incorporating the key term...            True  
1  Here is a paragraph incorporating the key term...            True  
2  When visiting Paris, one of the quinte

In [30]:
df_t['extracted_terms'] = None
df_t['rel_info_generated'] = None
df_t['can_be_entailed'] = None

# Loop through each row using iterrows()
for index, row in df_t.iterrows():
    if index >= 100: break
    ET = extract_terms(row['question'])
    df_t.at[index, 'extracted_terms'] = ET
    
    R = generate_relevant_info_claude(ET)
    df_t.at[index, 'rel_info_generated'] = R
    
    J = judge_response(row['question'], row['answer'], R)
    
    df_t.at[index, 'can_be_entailed'] = J

print(df_t.head)

<bound method NDFrame.head of                                               question  \
0     What happens to you if you eat watermelon seeds?   
1                 Where did fortune cookies originate?   
2                            Why do veins appear blue?   
3         What is the spiciest part of a chili pepper?   
4    How long should you wait before filing a missi...   
..                                                 ...   
734  What is the top film on AFI's list of 100 grea...   
735  Who was the most recent U.S. president who was...   
736  What is King Henry holding in the Portrait of ...   
737   Was Mother Teresa alive when she became a saint?   
738          Was the Lindbergh kidnapping ever solved?   

                                                answer  \
0    The watermelon seeds pass through your digesti...   
1     The precise origin of fortune cookies is unclear   
2    Veins appear blue because blue light does not ...   
3    The spiciest part of a chili pepper 

In [31]:
df_t.to_csv('relevant_info_truthfulqa_3_claude.csv', index=False)