# Load Data

In [1]:
# Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax

In [2]:
# Loading data from csv
df = pd.read_csv('Sarcasm_Model_Dataset.csv')
df.head()

# Splitting data into test and train (80:20 split)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=0)

# Frame Classifier

# Audio Classifier

# Text Classifer

In [14]:
# Load classifier
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)

# Classify text and add sentiment and score columns to DataFrame
def classify_text(row):
    text = row['text']
    encoded_input = tokenizer(text, return_tensors='tf')
    output = model(**encoded_input)
    scores = output[0][0].numpy()  # Convert TensorFlow tensor to NumPy array
    scores = softmax(scores)
    ranking = np.argsort(scores)[::-1]
    l = config.id2label[ranking[0]]  # Take the label with highest score
    s = np.round(float(scores[ranking[0]]), 4)
    return pd.Series({'sentiment': l, 'score': s})

# Apply classifier to each row in the DataFrame
train_df[['sentiment', 'sentiment_score']] = train_df.apply(classify_text, axis=1)
test_df[['sentiment', 'sentiment_score']] = test_df.apply(classify_text, axis=1)

# Display the DataFrame with new columns
print(train_df.head())
print(test_df.head())

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


           video                                            frame_1  \
104    2_208.mp4  8 8 8 8 8 6 6 6 6 6 7 7 7 6 6 6 5 4 9 23 14 13...   
141    2_575.mp4  8 8 6 3 8 12 14 33 44 58 54 24 37 29 35 38 38 ...   
4      1_276.mp4  123 124 124 92 31 11 14 15 15 16 15 15 15 13 1...   
74   1_10890.mp4  53 58 63 44 59 155 169 168 167 164 163 161 161...   
136    2_514.mp4  72 97 132 169 162 136 81 155 191 175 194 195 2...   

                                               frame_2  \
104  2 2 2 2 2 2 3 3 3 3 3 2 3 2 1 17 20 14 15 15 1...   
141  2 1 5 15 20 19 13 30 49 33 23 32 33 29 24 29 2...   
4    93 30 3 4 5 6 5 5 6 6 6 7 7 7 9 5 4 4 6 4 2 5 ...   
74   59 57 33 59 70 39 40 46 148 166 161 163 164 16...   
136  181 195 222 197 178 210 230 248 230 105 13 15 ...   

                                               frame_3         tone  \
104  6 6 6 7 7 7 8 8 7 6 6 7 7 7 7 7 7 9 7 6 7 9 9 ...    2_208.wav   
141  14 13 3 1 10 21 19 14 38 42 34 27 45 42 30 26 ...    2_575.wav   
4    123 96

           video                                            frame_1  \
166     2_34.mp4  51 52 51 52 61 60 59 56 60 69 56 45 47 50 55 6...   
20    1_1470.mp4  9 17 46 69 68 101 27 11 16 19 13 14 9 4 18 31 ...   
93     2_268.mp4  77 77 77 77 76 76 78 75 82 71 51 58 65 68 69 7...   
6      1_427.mp4  40 79 77 71 80 92 95 94 96 97 97 99 104 110 11...   
160    2_398.mp4  48 48 48 50 54 59 55 52 51 51 51 50 50 50 49 5...   
81   1_11736.mp4  121 121 118 117 115 114 112 110 110 109 107 10...   
199    2_302.mp4  45 47 46 42 43 43 42 40 45 50 49 47 42 39 42 4...   
140     2_43.mp4  97 95 102 104 107 109 110 111 111 112 112 113 ...   
67    1_7661.mp4  58 63 59 54 39 40 46 49 58 82 95 74 67 64 62 6...   
117    2_345.mp4  89 93 91 91 85 73 74 82 88 89 89 88 88 87 87 8...   

                                               frame_2  \
166  52 52 52 52 57 62 56 55 60 68 55 43 46 51 57 6...   
20   23 20 19 21 23 25 23 8 9 35 30 15 17 38 123 12...   
93   72 72 73 73 73 73 73 73 73 73 73 73 73 

# Sarcasm Detection Model Using Late Fusion