# WSC Project - Data Analysis & NLP

In [None]:
# Standard library imports
import warnings
warnings.filterwarnings('ignore')
import json

# Data science imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# NLP imports
from transformers import pipeline

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [15]:
ACTIONS = list(set(pd.read_csv("data/actions.csv")["parameter"]))
transcripts_folds_df = pd.read_csv('data/transcripts_folds.csv')
transcripts_folds_df = transcripts_folds_df.rename(columns={'Text': 'transcript_text'})
transcripts_df = transcripts_df[['sample_id', 'transcript_text', 'events']]
# transcripts_df['events_str'] = ['Events: ' + ', '.join(l) for l in [[] if l == '[None]' else eval(l) for l in transcripts_df['events'].tolist()]]
# transcripts_df['events_and_transcript'] = transcripts_df['events_str'] + '\n Transcript: ' + transcripts_df['transcript_text']
transcripts_text = transcripts_df['transcript_text'].tolist()

## zero-shot predict actions

In [None]:
MODEL = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
filename = 'data/all_df_preds_{MODEL}.json'

classifier = pipeline("zero-shot-classification", model=MODEL, device=-1)

batch_size = 16 # or any batch size you prefer
actions_preds_all = []
for i in tqdm(range(0, len(transcripts_text), batch_size)):
    batch = transcripts_text[i:i+batch_size]
    actions_preds_all.extend(classifier(batch, ACTIONS, multi_label=False))


# Convert the list to a dict, using index as key
actions_preds_val_dict = {i: v for i, v in enumerate(actions_preds_all)}
with open(filename, 'w') as f:
    json.dump(actions_preds_val_dict, f)

## zero-shot predict validations

In [10]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

In [28]:
pseudo_df = pd.read_csv("data/pseudo_actions_labels_with_id.csv")[['sample_id', 'action_detected']]
transcripts_folds_df[['sample_id', 'transcript_text', 'Label']]
expr_df = pseudo_df.merge(transcripts_folds_df, on='sample_id', how='inner').rename(columns={'action_detected': 'actions_pseudo_label', 'Text': 'transcript_text'})
expr_df = expr_df[['sample_id', 'transcript_text', 'Label', 'actions_pseudo_label']]
expr_df = expr_df[expr_df['actions_pseudo_label'].apply(lambda x: len(eval(x)) == 1)]
expr_df

Unnamed: 0,sample_id,transcript_text,Label,actions_pseudo_label
0,1,If you go into that defensive circle and post ...,1,['post up']
1,2,He's double teamed.,1,['double team']
2,3,Jordan trying to post up against Oxymora.,1,['post up']
3,4,They're going to come and help on all of his p...,0,['post up']
4,5,They're going to come and help on all of his p...,0,['post up']
...,...,...,...,...
1100,1101,John Collins can't convert the lob inside.,1,['lob']
1101,1102,Backdoor cut watch couldn't find him.,0,['backdoor']
1102,1103,Fake getting up in the air and complete the play.,1,['fake']
1103,1104,Here's Porter with a fake.,1,['fake']


In [37]:
transcripts_text = expr_df['transcript_text'].tolist()
action_in_transcript = expr_df['actions_pseudo_label'].apply(lambda x: eval(x)[0]).tolist()

In [None]:
# transcripts_text[:6]
# dict(list(zip(transcripts_text, y_true)))

In [None]:
model_name = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
# model_name = "BAAI/bge-large-en-v1.5 "
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

premise =   'Commentary: So he is doing the fake '
# premise =  "transcript: He's double teamed."
# hypothesis = "The movie was good."
H_pos = '"The {} action happened during the event'
# H_past = '"The \"{}\" happened before the event described by the transcript.'
# H_hypo = '"The \"{}\" was mentioned, but didnt actually happened in the event described by the transcript.'

action = "fake"
hypothesis = H_pos.format(action)
print(hypothesis)
# hypothesis = H_past.format(action)
# hypothesis = H_hypo.format(action)

input = tokenizer(premise, hypothesis, truncation=True, return_tensors="pt")
output = model(input["input_ids"])  # device = "cuda:0" or "cpu"
prediction = torch.softmax(output["logits"][0], -1).tolist()
label_names = ["entailment", "neutral", "contradiction"]
prediction = {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}
print(prediction)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


"The fake action happened during the event
{'entailment': 0.3, 'neutral': 99.5, 'contradiction': 0.1}


In [None]:
model_name = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# premise = "I first thought that I liked the movie, but upon second thought it was actually disappointing."
# hypothesis = "The movie was good."

P = ["transcript: {}".format(t) for t in transcripts_text]
H_pos = '"The \"{}\" happened during the event described by the transcript.'
H_past = '"The \"{}\" happened before the event described by the transcript.'
H_hypo = '"The \"{}\" was mentioned, but didnt actually happened in the event described by the transcript.'


batch_size = 64
validations_preds_all = []
print(len(P))
for i in range(0, len(transcripts_text), batch_size):
    print(f"{i} ({i/len(P)*100:.1f}%)")
    
    batch_P = P[i:i+batch_size]
    batch_H = H[i:i+batch_size]
    input = tokenizer(batch_P, batch_H, truncation=True, padding=True, return_tensors="pt")

    output = model(input["input_ids"])
    predictions = torch.softmax(output["logits"], -1).tolist()
    label_names = ["entailment", "neutral", "contradiction"]
    entailment_probs = [pred[0] for pred in predictions]
    validations_preds_all.extend(entailment_probs)
validations_preds_all


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


1068
0 (0.0%)
64 (6.0%)
128 (12.0%)
192 (18.0%)
256 (24.0%)
320 (30.0%)
384 (36.0%)
448 (41.9%)
512 (47.9%)
576 (53.9%)
640 (59.9%)
704 (65.9%)
768 (71.9%)
832 (77.9%)
896 (83.9%)
960 (89.9%)
1024 (95.9%)


[0.9206714034080505,
 0.36308109760284424,
 0.46421268582344055,
 0.07432728260755539,
 0.07432728260755539,
 0.42258113622665405,
 0.03489924222230911,
 0.3672965466976166,
 0.4331930875778198,
 0.8134548664093018,
 0.9873661994934082,
 0.9557252526283264,
 0.9557252526283264,
 0.6110960245132446,
 0.31805989146232605,
 0.8807279467582703,
 0.5511886477470398,
 0.8708033561706543,
 0.7164894342422485,
 0.8526604175567627,
 0.9619984030723572,
 0.22297416627407074,
 0.09502753615379333,
 0.14371614158153534,
 0.3042341470718384,
 0.6550674438476562,
 0.6293197274208069,
 0.7254096269607544,
 0.8107462525367737,
 0.8784177899360657,
 0.6553992033004761,
 0.27079063653945923,
 0.7524520754814148,
 0.5328037738800049,
 0.4075777530670166,
 0.6480801105499268,
 0.5169299244880676,
 0.5536037683486938,
 0.4757145643234253,
 0.9377665519714355,
 0.8038438558578491,
 0.1109733060002327,
 0.39492639899253845,
 0.47027021646499634,
 0.47027021646499634,
 0.9538576006889343,
 0.08482426404953003

In [211]:
y_true = expr_df['Label'].tolist()
y_pred = validations_preds_all

In [None]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import average_precision_score


# Convert probabilities to binary predictions using a threshold (e.g., 0.5)
threshold = 0.01
y_pred_bin = [1 if p >= threshold else 0 for p in y_pred]

precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
ap = average_precision_score(y_true, y_pred_bin)

# Create a DataFrame for each fold's metrics
metrics_df = pd.DataFrame([{
    'precision': precision,
    'recall': recall,
    'f1': f1,
    'ap': ap,
}])
display(metrics_df)



cm = confusion_matrix(y_true, y_pred_bin)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
fig, ax = plt.subplots(figsize=(4,4))
disp.plot(ax=ax)
plt.title("Confusion Matrix (Ratios)")
plt.show()
