In [13]:
############################
######### SETTINGS #########
############################

#%pip install lightgbm

# Dependencies
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from sklearn.metrics import mean_absolute_error, mean_squared_error
import math

import numpy as np
import pandas as pd
from datasets import Dataset
from sklearn.model_selection import KFold
from tqdm import tqdm
tqdm.pandas()

import seaborn as sns
import evaluate

# Constants
RS = 42


In [2]:
############################
######## FUNCTIONS #########
############################

In [68]:
cv_df = pd.read_csv("./experiments/csv_results/cv_results.csv")
test_df = pd.read_csv("./experiments/csv_results/test_df.csv")

In [69]:
from sklearn.ensemble import GradientBoostingRegressor

In [70]:
############################
######### CV TRAINING ######
############################

kf = KFold(n_splits=3, random_state=RS, shuffle=True)

y_model = ["predictions_ft_rouge", "predictions_zs_rouge"]

full_results = {} 
for column in y_model:
    for fold in range(0, 3):
        X_train = cv_df.loc[cv_df.fold!=fold, "input_sequence"]
        X_val = cv_df.loc[cv_df.fold==fold, "input_sequence"]
        
        y_train = cv_df.loc[cv_df.fold!=fold, f"fold_{column}"]
        y_val = cv_df.loc[cv_df.fold==fold, f"fold_{column}"]

        X_test = test_df.loc[:, "input_sequence"]
        y_test = test_df.loc[:, column]

        # TF-IDF Vectorization
        vectorizer = TfidfVectorizer()
        X_train_tfidf = vectorizer.fit_transform(X_train)
        X_val_tfidf = vectorizer.transform(X_val)
        X_test_tfidf = vectorizer.transform(X_test)

        # SVM Model
        svm = GradientBoostingRegressor()#SVR(kernel='linear')
        svm.fit(X_train_tfidf, y_train)

        # Predictions and evaluation
        y_pred = svm.predict(X_val_tfidf)
        preds_df = pd.DataFrame(data={"label": y_val, "pred": y_pred})
        preds_df["i_mae"] = preds_df.apply(lambda x: abs(x["label"]-x["pred"]), axis=1)
        preds_df["base_avg"] = preds_df.label.mean()
        preds_df["base_i_mae"] = preds_df.apply(lambda x: abs(x["label"]-x["base_avg"]), axis=1)


        # Test predictions
        y_test_pred = svm.predict(X_test_tfidf)

        if fold==0:
            results = preds_df.copy()
            test_preds = [y_test_pred]
        else: 
            results = pd.concat([results, preds_df])
            test_preds.append(y_test_pred)
    
    full_results[column] = {"cv_results" : results, "test_preds": np.array(test_preds).mean(axis=0)}


In [72]:
cv_df["predictions_meta_zs"] = full_results["predictions_zs_rouge"]["cv_results"]["pred"]
cv_df["predictions_meta_ft"] = full_results["predictions_ft_rouge"]["cv_results"]["pred"]

test_df["predictions_meta_zs"] = full_results["predictions_zs_rouge"]["test_preds"]
test_df["predictions_meta_ft"] = full_results["predictions_ft_rouge"]["test_preds"]

In [73]:
rouge = evaluate.load('rouge')

In [62]:
# cv_df["FTvsZS"] = cv_df["fold_predictions_ft_rouge"] > cv_df["fold_predictions_zs_rouge"]
# cv_df.loc[cv_df["FTvsZS"], "ModelUsed"] = "FT"
# cv_df.loc[~cv_df["FTvsZS"], "ModelUsed"] = "ZS"
# cv_df.loc[cv_df["ModelUsed"]=="ZS", "MetaPred"] = cv_df.loc[cv_df["ModelUsed"]=="ZS", "fold_predictions_zs"]
# cv_df.loc[cv_df["ModelUsed"]=="FT", "MetaPred"] = cv_df.loc[cv_df["ModelUsed"]=="FT", "fold_predictions_ft"]

# cv_df.loc[:, "meta_rouge"] = rouge.compute(references=cv_df["output_sequence"], predictions=cv_df["MetaPred"], use_aggregator=False)["rouge1"]

# test_df["FTvsZS"] = test_df["predictions_ft_rouge"] > test_df["predictions_zs_rouge"]
# test_df.loc[test_df["FTvsZS"], "ModelUsed"] = "FT"
# test_df.loc[~test_df["FTvsZS"], "ModelUsed"] = "ZS"
# test_df.loc[test_df["ModelUsed"]=="ZS", "MetaPred"] = test_df.loc[test_df["ModelUsed"]=="ZS", "predictions_zs"]
# test_df.loc[test_df["ModelUsed"]=="FT", "MetaPred"] = test_df.loc[test_df["ModelUsed"]=="FT", "predictions_ft"]

# test_df.loc[:, "meta_rouge"] = rouge.compute(references=test_df["output_sequence"], predictions=test_df["MetaPred"], use_aggregator=False)["rouge1"]

# cv_df.meta_rouge.mean()
# test_df.meta_rouge.mean()

In [74]:
cv_df["FTvsZS"] = cv_df["predictions_meta_ft"] > cv_df["predictions_meta_zs"]
cv_df.loc[cv_df["FTvsZS"], "ModelUsed"] = "FT"
cv_df.loc[~cv_df["FTvsZS"], "ModelUsed"] = "ZS"
cv_df.loc[cv_df["ModelUsed"]=="ZS", "MetaPred"] = cv_df.loc[cv_df["ModelUsed"]=="ZS", "fold_predictions_zs"]
cv_df.loc[cv_df["ModelUsed"]=="FT", "MetaPred"] = cv_df.loc[cv_df["ModelUsed"]=="FT", "fold_predictions_ft"]

cv_df.loc[:, "meta_rouge"] = rouge.compute(references=cv_df["output_sequence"], predictions=cv_df["MetaPred"], use_aggregator=False)["rouge1"]

In [75]:
test_df["FTvsZS"] = test_df["predictions_meta_ft"] > test_df["predictions_meta_zs"]
test_df.loc[test_df["FTvsZS"], "ModelUsed"] = "FT"
test_df.loc[~test_df["FTvsZS"], "ModelUsed"] = "ZS"
test_df.loc[test_df["ModelUsed"]=="ZS", "MetaPred"] = test_df.loc[test_df["ModelUsed"]=="ZS", "predictions_zs"]
test_df.loc[test_df["ModelUsed"]=="FT", "MetaPred"] = test_df.loc[test_df["ModelUsed"]=="FT", "predictions_ft"]

test_df.loc[:, "meta_rouge"] = rouge.compute(references=test_df["output_sequence"], predictions=test_df["MetaPred"], use_aggregator=False)["rouge1"]

In [76]:
print(f"Cross Validation Rouge Meta-Approach {cv_df.loc[:, 'meta_rouge'].mean()}")
print(f"Test Rouge Meta-Approach {test_df.loc[:, 'meta_rouge'].mean()}")

Cross Validation Rouge Meta-Approach 0.39594524943456555
Test Rouge Meta-Approach 0.4208453737174197
