In [250]:
from datasets import load_dataset_builder, load_dataset, get_dataset_infos, get_dataset_config_names, list_datasets
from langchain.prompts import load_prompt
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.schema.output_parser import StrOutputParser
from langchain.chat_models import ChatOpenAI
from operator import itemgetter
from sklearn.model_selection import train_test_split
from typing import List, Dict, Any, Optional, Union, Tuple, Callable
from dataclasses import dataclass
from tqdm import tqdm
import json
import yaml
import pandas as pd
import numpy as np

In [67]:
# list_datasets()

# Data

In [68]:
def load_dataset_by_name(name):
    df = pd.read_csv(f"data/{name}_data.csv")
    df.columns.values[0] = 'text'
    df.columns.values[1] = 'class'      
    df = df.dropna()
    return df

In [317]:
def split_datasets(df_dict, test_size=0.2):
    df_dict_train = {}
    df_dict_test = {}

    for dataset, df in df_dict.items():
        df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)
        df_dict_train[dataset] = df_train
        df_dict_test[dataset] = df_test
    return df_dict_train, df_dict_test

def construct_example_strings(df, str_fn=None):
    df_dict_examples = {}
    for dataset_name, dataset_df in df.items():
        if str_fn:
            df_dict_examples[dataset_name] = dataset_df.apply(str_fn, axis=1).tolist()
        else:
            df_dict_examples[dataset_name] = dataset_df.apply(lambda x: f"{x['text']}[{x['class']}]", axis=1).tolist()
    return df_dict_examples

In [None]:
my_datasets = ["lowercase", 
               "imdb_numbers", 
               "imdb_digits", 
               "backpack", 
               "sd_addition",
               "gpt_digits"]
df_dict = {name: load_dataset_by_name(name) for name in my_datasets}

dict_train, dict_test = split_datasets(df_dict)

str_fn = lambda x: f"sentence: {x['text']}\nclassification: {x['class']}"
dict_examples = construct_example_strings(dict_train, str_fn=str_fn)

In [None]:
# print a few examples from dict_examples
for dataset, examples in dict_examples.items():
    print(f"Dataset: {dataset}")
    print("Examples:")
    for example in examples[:3]:
        print(example)
    print()

# Config

In [422]:
# model_types = {"gpt-4": "gpt-4-1106-preview", "gpt-3":"gpt-3.5-turbo-1106"}

@dataclass
class RunArgs():
    n_test = 15
    n_examples = 15
    test_dataset_name = "gpt_digits"
    model = "gpt-3.5-turbo-1106"
    template = "templates/classification_4.yaml"
    randomize_examples = True
    label_rename_map = {"True":"Type A", "False":"Type B"}

args = RunArgs()

# Model

#### Utility Functions

In [423]:
# def get_examples(x):
#     dataset = x['dataset']
#     examples = dict_examples[dataset]

#     if 'n_examples' in x:
#         n_examples = x['n_examples']
#         examples = examples[:n_examples]

#     "\n".join(examples)
#     return examples

def get_output_parser():
    response_schemas = [
        ResponseSchema(name="sentence", description="the sentence to classify"),
        ResponseSchema(name="classification", description="the classification of the sentence, Type A or Type B")
    ]
    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
    return output_parser

# def classify_multiple(test_df: pd.DataFrame,) -> str:
#     sentences = '\n'.join(['sentence: ' + s for s in test_data['text'].tolist()])
#     inputs = {"input": sentences, "dataset": args.test_dataset_name}
#     pred = chain.invoke(inputs)
#     return pred

def classify_individually(test_df, chain):
    results = []
    for index, row in tqdm(test_df.iterrows()):
        sentence = row['text']
        ground_truth = row['class']
        
        inputs = {"input": sentence, "dataset": args.test_dataset_name}
        out = chain.invoke(inputs)

        # parsed = output_parser.parse(out.content)
        # assert sentence == parsed['sentence']
        # pred = parsed['classification']

        results.append(out.content)
        # results.append((sentence, ground_truth, pred))
    
    return results
    # return pd.DataFrame(results, columns=["text", "ground_truth", "prediction"])

def get_examples(dataset_name: str, n_examples: int, randomize_examples: bool = False, label_rename_map: Dict = None) -> List[str]:
    df_examples = dict_train[dataset_name]

    if randomize_examples:
        df_examples = df_examples.sample(n_examples)
    else:
        df_examples = df_examples[:n_examples]
    
    df_examples.columns = ['sentence', 'classification']
    df_examples['classification'].astype(bool)
    if label_rename_map:
        df_examples['classification'] = df_examples['classification'].apply(lambda x: label_rename_map[str(x)])

    return df_examples

def json_formatted_examples(examples = pd.DataFrame) -> List[str]:
    json_examples = examples.to_json(orient="records")
    return json_examples
    

#### Components

In [424]:
# model
model = ChatOpenAI(model=args.model)

# prompt
classification_template = load_prompt(args.template)

# output parser
output_parser = get_output_parser()
classification_template = classification_template.partial(output_format=output_parser.get_format_instructions())

# Run Chain

In [425]:
chain = (
    RunnablePassthrough.assign(
        examples=RunnableLambda(get_examples)
    )
    | classification_template
    | model
)

# chain = (
#     RunnablePassthrough.assign(
#         examples=RunnableLambda(get_examples)
#     )
#     | {"out": classification_template | model | RunnableLambda(lambda x: x.content)}
# )

In [427]:
examples = get_examples(
    args.test_dataset_name, 
    args.n_examples, 
    randomize_examples=args.randomize_examples,
    label_rename_map=args.label_rename_map
)

In [429]:
json_formatted_examples(examples)

'[{"sentence":"The human brain weighs about 3 pounds.","classification":"Type A"},{"sentence":"India became independent in 1947.","classification":"Type A"},{"sentence":"Shakespeare wrote \'Hamlet\'.","classification":"Type B"},{"sentence":"The heart circulates blood throughout the body.","classification":"Type B"},{"sentence":"The Titanic sank in 1912.","classification":"Type A"},{"sentence":"Mona Lisa has no eyebrows.","classification":"Type B"},{"sentence":"A group of crows is called a murder.","classification":"Type B"},{"sentence":"Venus is the hottest planet in our solar system.","classification":"Type B"},{"sentence":"The human liver has over 500 functions.","classification":"Type B"},{"sentence":"The human body has 206 bones.","classification":"Type A"},{"sentence":"2 apples are on the table.","classification":"Type A"},{"sentence":"Neon lights were first demonstrated in 1910.","classification":"Type A"},{"sentence":"The Dead Sea is one of the saltiest bodies of water in the wo

In [366]:
test_data = dict_test[args.test_dataset_name][:args.n_test]
sentences = '\n'.join(['sentence: ' + s for s in test_data['text'].tolist()])

In [367]:
print(sentences[:5])

sente


In [360]:
# # results_df = classify_individually(test_data)
# preds = classify_multiple(sentences)
# preds
# out = preds.content[8:-5]
# out = json.loads(out)

# results_df = pd.DataFrame(preds).rename(columns={"sentence": "text", "classification": "prediction"})
# results_df['ground_truth'] = test_data['class'].tolist()
# results_df

In [285]:
# calculate percent correct in results_df
correct = results_df['ground_truth'] == results_df['prediction']
print(f"percent correct: {correct.mean()}")

percent correct: 0.95


In [276]:
# results = []
# for index, row in test.iterrows():
#     text = row['text']
#     ground_truth = row['class']
    
#     inputs = {"input": text, "dataset": test_dataset_name}
#     pred = chain.invoke(inputs)
#     results.append((text, ground_truth, pred))

#     print(text)
#     print(f"predicted: {pred}")
#     print(f"actual: {ground_truth}")
#     print()

# results_df = pd.DataFrame(results, columns=["text", "ground_truth", "prediction"])

In [157]:
# input = "\n".join([f"{i+1}. {sentence}" for i, sentence in enumerate(test['text'].tolist())])
# def classify_many():
#     # input = "\n".join([f"{i+1}. {sentence}" for i, sentence in enumerate(test['text'].tolist())])
#     inputs = {"input": input, "dataset": test_dataset_name, "n_examples": 100}
#     pred = chain.invoke(inputs)
#     return pred

# pred = classify_many()

# def classify_one(sentence):
#     inputs = {"input": sentence, "dataset": test_dataset_name, "n_examples": 100}
#     pred = chain.invoke(inputs)
#     return pred

In [277]:
results_df

Unnamed: 0,text,prediction,ground_truth
0,3 + 3 = 6,Type A,Type A
1,8 + 8 = 16,Type A,Type A
2,6 + 0 = 4,Type B,Type B
3,4 + 1 = 5,Type A,Type A
4,4 + 3 = 18,Type B,Type B
