In [20]:
# ! pip install --upgrade --quiet  langchain-google-genai

In [1]:
import pandas as pd
import os
from datasets import load_dataset
from langchain_google_genai import GoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
from tqdm import tqdm
from ast import literal_eval

In [2]:
dataset = load_dataset("cais/mmlu", "sociology")

In [3]:
load_dotenv('../.env')

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [4]:
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY)

In [5]:
template = """
Imagine you are expert in sociology, given the following question from the topic of sociology.
Go through it thoroughly and tell whether it is a valid question or not.

Question: {question}
Options: {options}

Convey your opinion by saying "Valid" or "Invalid", and add a small explanation if you deem necessary. 
Return this in a json format with the keys "validity" and "explanation" """
prompt = PromptTemplate.from_template(template)

chain = prompt | llm


print(chain.invoke({"question": dataset['test'][0]['question'], 
                    "options":dataset['test'][0]['choices']
                   }))

```json
{
  "validity": "Valid",
  "explanation": "Mass-society theory is a sociological theory that suggests that the mass media have a powerful influence on society, and that this influence can be negative. The theory posits that the mass media can manipulate 'the masses' as vulnerable, passive consumers, and that this can lead to social problems such as crime, violence, and political extremism."
}
```


## 1.1 Creating the loop to check for validity in one topic of MMLU

In [6]:
count = dataset['test'].shape[0]
validities = []
for i in tqdm(range(count)):
    valid = chain.invoke({"question": dataset['test'][i]['question'], 
                    "options":dataset['test'][i]['choices']
                   })
    validities.append(valid)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 201/201 [07:05<00:00,  2.12s/it]


In [7]:
import re

def replace_inner_double_quotes(text):
    # Find the index of "explanation:" in the text
    explanation_index = text.find('"explanation":')

    # If "explanation:" is not found, return the original text
    if explanation_index == -1:
        return text

    # Extract the text after "explanation:"
    explanation_text = text[explanation_index + len('"explanation":'):]

    # Find the indices of the first and last double quotes in the explanation text
    start_quote_index = explanation_text.find('"')
    end_quote_index = explanation_text.rfind('"')

    # If no double quotes are found, return the original text
    if start_quote_index == -1 or end_quote_index == -1:
        return text

    # Extract the inner explanation text
    inner_explanation_text = explanation_text[start_quote_index + 1:end_quote_index]

    # Replace double quotes with escaped single quotes in the inner explanation text
    modified_inner_explanation_text = re.sub(r'"', r"\'", inner_explanation_text)

    # Replace the original inner explanation text with the modified one
    modified_text = text[:explanation_index + len('"explanation":') + start_quote_index + 1] + modified_inner_explanation_text + text[explanation_index + len("explanation:") + end_quote_index:]

    return modified_text

In [8]:
def get_dict_from_json(snippet):
    if snippet == '': return {}
    if 'json' in snippet:
        text = snippet.split('```json')[1][:-3]
    elif '```' in snippet:
        text = snippet.split('```')[1]
    else:
        text = snippet
        
    # Handling edge case of different JSON generation    
    if '"isValid": true' in text:
        text = text.replace('"isValid": true','"validity": "Valid"')
        
    elif '"isValid": false' in text:
        text = text.replace('"isValid": false','"validity": "Invalid"')
        
    clean_text = text.replace('\n','')
    clean_text = replace_inner_double_quotes(clean_text)
    
    return literal_eval(clean_text)

In [9]:
valids_for_df = [get_dict_from_json(x) for x in validities]

In [129]:
# valid_df = pd.DataFrame(valids_for_df)[['validity','Validity','valid','question_validity','explanation','reason','Explanation']]

In [130]:
# valid_df

In [122]:
valid_df['val'] = valid_df['validity'].astype(str) + valid_df['valid'].astype(str) + valid_df['Validity'].astype(str) + valid_df['question_validity'].astype(str)

In [123]:
valid_df['val'] = valid_df['val'].apply(lambda x: x.replace('nan',''))

In [125]:
clean_valid_df = valid_df[['val','explanation']]

In [127]:
clean_valid_df[clean_valid_df.val == 'Invalid']

Unnamed: 0,val,explanation
20,Invalid,
142,Invalid,The question is invalid because it assumes tha...
150,Invalid,"Triangulation is a research method, not a purp..."
200,Invalid,Domhoff did not identify the exploitation proc...


In [10]:
val_df =  pd.DataFrame(valids_for_df)

In [14]:
val_df[val_df.validity == 'Invalid']

Unnamed: 0,validity,explanation
142,Invalid,The question is invalid because it presents a ...
150,Invalid,Triangulation is not a research purpose. It is...
160,Invalid,The question is invalid because 'buying fewer ...


In [25]:
val_df.iloc[150].explanation

'Triangulation is not a research purpose. It is a research method that involves the use of multiple data sources and methods to enhance the validity and reliability of research findings.s.'

In [26]:
dataset['test'][150]

{'question': "Which of the following is not a 'research purpose'?",
 'subject': 'sociology',
 'choices': ['triangulation', 'explanation', 'description', 'exploration'],
 'answer': 0}