# Load Data

In [19]:
# Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
# Loading data from csv
df = pd.read_csv('Sarcasm_Model_Dataset.csv')
df.head()

# Splitting data into test and train (80:20 split)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=0)

# Frame Classifier

# Audio Classifier

# Text Classifer

In [14]:
# Load sentiment classifier
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
config = AutoConfig.from_pretrained(MODEL)
model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)

# Classify text and add sentiment and score columns to DataFrame
def classify_text(row):
    text = row['text']
    encoded_input = tokenizer(text, return_tensors='tf')
    output = model(**encoded_input)
    scores = output[0][0].numpy()  # Convert TensorFlow tensor to NumPy array
    scores = softmax(scores)
    ranking = np.argsort(scores)[::-1]
    l = config.id2label[ranking[0]]  # Take the label with highest score
    s = np.round(float(scores[ranking[0]]), 4)
    return pd.Series({'sentiment': l, 'score': s})

# Apply classifier to each row in the DataFrame
train_df[['sentiment', 'sentiment_score']] = train_df.apply(classify_text, axis=1)
test_df[['sentiment', 'sentiment_score']] = test_df.apply(classify_text, axis=1)

# Display the DataFrame with new columns
print(train_df.head())
print(test_df.head())

All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some layers of TFRobertaForSequenceClassification were not initialized from the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


           video                                            frame_1  \
104    2_208.mp4  8 8 8 8 8 6 6 6 6 6 7 7 7 6 6 6 5 4 9 23 14 13...   
141    2_575.mp4  8 8 6 3 8 12 14 33 44 58 54 24 37 29 35 38 38 ...   
4      1_276.mp4  123 124 124 92 31 11 14 15 15 16 15 15 15 13 1...   
74   1_10890.mp4  53 58 63 44 59 155 169 168 167 164 163 161 161...   
136    2_514.mp4  72 97 132 169 162 136 81 155 191 175 194 195 2...   

                                               frame_2  \
104  2 2 2 2 2 2 3 3 3 3 3 2 3 2 1 17 20 14 15 15 1...   
141  2 1 5 15 20 19 13 30 49 33 23 32 33 29 24 29 2...   
4    93 30 3 4 5 6 5 5 6 6 6 7 7 7 9 5 4 4 6 4 2 5 ...   
74   59 57 33 59 70 39 40 46 148 166 161 163 164 16...   
136  181 195 222 197 178 210 230 248 230 105 13 15 ...   

                                               frame_3         tone  \
104  6 6 6 7 7 7 8 8 7 6 6 7 7 7 7 7 7 9 7 6 7 9 9 ...    2_208.wav   
141  14 13 3 1 10 21 19 14 38 42 34 27 45 42 30 26 ...    2_575.wav   
4    123 96

In [42]:
# Encode labels
label_encoder = LabelEncoder()
train_df['sarcasm_encoded'] = label_encoder.fit_transform(train_df['sarcasm'])
test_df['sarcasm_encoded'] = label_encoder.fit_transform(test_df['sarcasm'])

# Text preprocessing
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_text_train = tfidf_vectorizer.fit_transform(train_df['text']).toarray()
X_text_test = tfidf_vectorizer.transform(test_df['text']).toarray()

# Convert sentiment labels to numerical values
sentiment_mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
train_df['sentiment_encoded'] = train_df['sentiment'].map(sentiment_mapping)
test_df['sentiment_encoded'] = test_df['sentiment'].map(sentiment_mapping)

# Combine text features with sentiment scores
X_train = np.concatenate((X_text_train, train_df[['sentiment_encoded', 'sentiment_score']].values), axis=1)
y_train = train_df['sarcasm_encoded']
X_test = np.concatenate((X_text_test, test_df[['sentiment_encoded', 'sentiment_score']].values), axis=1)
y_test = test_df['sarcasm_encoded']

In [43]:
# Training parameters
n_epochs = 20
n_batch = 32

# Define neural network architecture
text_model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.25),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
text_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
text_model.fit(X_train, y_train, batch_size=n_batch, epochs=n_epochs, validation_split=0.2)

# Evaluate the model
loss, accuracy = text_model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - accuracy: 0.4179 - loss: 0.6961 - val_accuracy: 0.5294 - val_loss: 0.6984
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5460 - loss: 0.6906 - val_accuracy: 0.5882 - val_loss: 0.6978
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7222 - loss: 0.6809 - val_accuracy: 0.5588 - val_loss: 0.6985
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6406 - loss: 0.6800 - val_accuracy: 0.4706 - val_loss: 0.6995
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6891 - loss: 0.6705 - val_accuracy: 0.5294 - val_loss: 0.6989
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8466 - loss: 0.6586 - val_accuracy: 0.5294 - val_loss: 0.6964
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [44]:
# Predict values for the test set
predictions = text_model.predict(X_test)

# Print the predicted values
print("Predicted Values:")
print(predictions)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Predicted Values:
[[0.26481426]
 [0.44587934]
 [0.21210124]
 [0.48192853]
 [0.27117366]
 [0.25039634]
 [0.72702086]
 [0.7342012 ]
 [0.586081  ]
 [0.49969736]
 [0.58512634]
 [0.74650764]
 [0.22810024]
 [0.5528277 ]
 [0.3733225 ]
 [0.5861342 ]
 [0.16946304]
 [0.38853127]
 [0.68439186]
 [0.72866553]
 [0.42747384]
 [0.5923272 ]
 [0.17361885]
 [0.36331174]
 [0.5767516 ]
 [0.28934792]
 [0.31974253]
 [0.19313169]
 [0.46339047]
 [0.19743203]
 [0.25354373]
 [0.6837827 ]
 [0.69847083]
 [0.49974117]
 [0.3186751 ]
 [0.32532614]
 [0.45638475]
 [0.6400878 ]
 [0.7657035 ]
 [0.24513705]
 [0.32134756]
 [0.2876822 ]]


# Sarcasm Detection Model Using Late Fusion