# Analysis and Plots of row completion

In [None]:
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

# append the parent directory to the path
import sys
sys.path.append('..')

from sklearn.model_selection import train_test_split

import jellyfish

import yaml

import experiment_utils
import seaborn as sns
import tqdm

import seaborn as sns
import pickle

import analysis 
import utils

from row_independence import statistical_feature_prediction_test

# re-load upon module change
%load_ext autoreload
%autoreload 2

### row completion test

In [None]:
for dataset in ['IRIS', 'uci-wine', 'titanic-train', 'sklearn-diabetes', 'openml-diabetes', 'california-housing', 'adult', 'spaceship-titanic-train']:
    for model in ['gpt-3.5-turbo', 'gpt-4-32k-0314']:
        completion_df = pd.read_csv(f'../results/{model}/row-completion/{dataset}.csv', dtype=str)
        
        test_prefixes = completion_df['prefix'].tolist()
        test_suffixes = completion_df['suffix'].tolist()
        responses = completion_df['response'].tolist()

        try:
            num_exact_matches = 0
            for test_suffix, response in zip(test_suffixes, responses):
                response = str(response)
                if test_suffix.strip() in response.strip():
                    num_exact_matches += 1

            # the statistical test using the levenshtein distance
            test_prefix_rows = [prefix.split("\n") for prefix in test_prefixes]
            test_result = analysis.levenshtein_distance_t_test(
                responses, test_suffixes, test_prefix_rows
            )
            print(
                experiment_utils.bcolors.BOLD
                + f'{dataset}, {model}: '
                + experiment_utils.bcolors.ENDC
                + experiment_utils.bcolors.Black
                + f"{num_exact_matches}/{len(responses)} exact matches. Levenshtein distance test p-value: {test_result.pvalue:.3f}."
                + experiment_utils.bcolors.ENDC
            )
        except:
            print(f'{dataset}, {model}: error')

### first token test

In [None]:
for dataset in ['IRIS', 'uci-wine', 'titanic-train', 'sklearn-diabetes', 'openml-diabetes', 'california-housing', 'adult', 'spaceship-titanic-train']:
    print(dataset)
    csv_file = f'../csv/{dataset}.csv'
    num_digits = analysis.build_first_token(csv_file)
    # statistical prediction of the first token
    df_data = utils.load_csv_df(csv_file)
    csv_rows = utils.load_csv_rows(csv_file, header=False)
    first_tokens = [row[:num_digits] for row in csv_rows]
    df_data['FIRST_TOKEN'] = first_tokens
    filename = utils.tmp_csv_file(df_data, 'tmp.csv')
    statistical_feature_prediction_test(filename, 'FIRST_TOKEN')

    

    # analysis of model completions
    for model in ['gpt-3.5-turbo', 'gpt-4-32k-0314']:
        completion_df = pd.read_csv(f'../results/{model}/row-completion/{dataset}.csv', dtype=str)

        test_prefixes = completion_df['prefix'].tolist()
        test_suffixes = completion_df['suffix'].tolist()
        responses = completion_df['response'].tolist()

        try:
            num_exact_matches = 0
            for test_suffix, response in zip(test_suffixes, responses):
                response = str(response)
                if test_suffix.strip()[:num_digits] == response.strip()[:num_digits]:
                    num_exact_matches += 1

            print(
                experiment_utils.bcolors.BOLD
                + f'{dataset}, {model}: '
                + experiment_utils.bcolors.ENDC
                + experiment_utils.bcolors.Black
                + f"{num_exact_matches}/{len(responses)} ({num_exact_matches / len(responses)})."
                + experiment_utils.bcolors.ENDC
            )
        except:
            print(f'{dataset}, {model}: error')
        
    print('-'*80)