In [1]:
!nvidia-smi


Tue Aug 12 23:55:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [55]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings('ignore')

In [3]:
emotion_df = pd.read_csv("/content/emotion.csv")
hate_df = pd.read_csv("/content/Hate.csv")
violence_df = pd.read_csv("/content/violence.csv")

In [5]:
emotion_df.drop(columns=['Unnamed: 0'], inplace=True)

In [7]:
violence_df.drop(columns=['Tweet_ID'], inplace = True)

In [13]:
hate_df.rename(columns={'tweet':'text', 'class':'label'}, inplace=True)
violence_df.rename(columns={'tweet':'text', 'type':'label'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hate_df.rename(columns={'tweet':'text', 'class':'label'}, inplace=True)


In [15]:
hate_df.shape, emotion_df.shape, violence_df.shape

((24783, 2), (416809, 2), (39650, 2))

In [16]:
e_df = pd.DataFrame()

for i in range(6):
  subset = emotion_df[emotion_df['label']==i].sample(n = 2000, random_state=42)
  e_df = pd.concat([e_df, subset])

In [21]:
emotion_df = e_df.copy()

In [22]:
emotion_df.shape

(12000, 2)

In [24]:

sexual_violence = violence_df[violence_df['label'] == 'sexual_violence'].sample(n = 4998, random_state = 42)
violence_df = violence_df[violence_df['label'] != 'sexual_violence']

In [25]:
violence_df = pd.concat([violence_df, sexual_violence], axis = 0)

In [26]:
violence_df.shape

(12000, 2)

In [27]:
offensive_speech = hate_df[hate_df['label'] == 1].sample(n = 6407, random_state = 42)
hate_df = hate_df[hate_df['label'] != 1]

In [28]:
hate_df = pd.concat([hate_df, offensive_speech], axis = 0)

In [29]:
hate_df.shape

(12000, 2)

In [30]:
emotion_df.shape, hate_df.shape, violence_df.shape

((12000, 2), (12000, 2), (12000, 2))

In [31]:
# resetting the indeces

emotion_df.reset_index(drop=True, inplace=True)
hate_df.reset_index(drop=True, inplace=True)
violence_df.reset_index(drop=True, inplace=True)

### Label Encoding

In [35]:
label_encoder = LabelEncoder()
violence_df['label'] = label_encoder.fit_transform(violence_df['label'])

In [38]:
violence_df.label.unique()

array([1, 3, 0, 2, 4])

### Stop words removal

In [45]:
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [41]:
# loading the stopwords
stop_words = set(stopwords.words('english'))

In [43]:
len(stop_words)

198

In [46]:
def remove_stopwords(text):
  all_words = nltk.word_tokenize(text)
  filltered_words = [word for word in all_words if word.lower() not in stop_words]
  return ' '.join(filltered_words)

emotion_df['text'] = emotion_df['text'].apply(remove_stopwords)
hate_df['text'] = hate_df['text'].apply(remove_stopwords)
violence_df['text'] = violence_df['text'].apply(remove_stopwords)

In [47]:
emotion_df.head(3)

Unnamed: 0,text,label
0,ive learned surround women lift leave feeling ...,0
1,already feel crappy upset situation doesnt help,0
2,feel like lost mourned moved past tears relati...,0


### Tokenization & Padding

In [49]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(pd.concat([emotion_df['text'], hate_df['text'], violence_df['text']]))

In [50]:
emotion_sequences = tokenizer.texts_to_sequences(emotion_df['text'])
hate_sequences = tokenizer.texts_to_sequences(hate_df['text'])
violence_sequences = tokenizer.texts_to_sequences(violence_df['text'])

In [51]:
emotion_df['text'].iloc[2]

'feel like lost mourned moved past tears relationship'

In [53]:
emotion_sequences[2:3]

[[1, 5, 321, 11854, 1207, 422, 1093, 385]]

In [52]:
max_len = 50
emotion_padded = pad_sequences(emotion_sequences, maxlen=max_len, padding= 'post')
hate_padded = pad_sequences(hate_sequences, maxlen=max_len, padding = 'post')
violence_padded = pad_sequences(violence_sequences, maxlen=max_len, padding = 'post')

In [54]:
emotion_padded[2:3]

array([[    1,     5,   321, 11854,  1207,   422,  1093,   385,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0]], dtype=int32)

In [56]:
# Generating laberls in numpy array format

emotion_labels = np.array(emotion_df['label'])
hate_labels = np.array(hate_df['label'])
violence_labels = np.array(violence_df['label'])

### Model Definition

In [57]:
# Prepare the inputs for each datasets

emotion_input = emotion_padded
violence_input = violence_padded
hate_input = hate_padded


In [70]:
# defining multiple input layers for each task
from tensorflow import keras


emotion_input_layer = keras.layers.Input(shape=(max_len, ), name='emotion_input')
violence_input_layer = keras.layers.Input(shape=(max_len, ), name='violence_input')
hate_input_layer = keras.layers.Input(shape=(max_len, ), name='hate_input')

In [71]:
embedding_layer = keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=128, name='embedding_layer')

In [72]:
emotion_embedding = embedding_layer(emotion_input_layer)
violence_embedding = embedding_layer(violence_input_layer)
hate_embedding = embedding_layer(hate_input_layer)

In [73]:
# Shared LSTM layer

shared_lstm = keras.layers.LSTM(units=64, return_sequences=True, name='shared_lstm')

In [74]:
emotion_lstm = shared_lstm(emotion_embedding)
violence_lstm = shared_lstm(violence_embedding)
hate_lstm = shared_lstm(hate_embedding)

In [75]:
#Shared GlobalAveragePooling & Dropout layer

shared_pooling = keras.layers.GlobalAveragePooling1D(name='shared_pooling')
shared_dropout = keras.layers.Dropout(rate=0.5, name='shared_dropout')


In [76]:
emotion_feature = shared_dropout(shared_pooling(emotion_lstm))
violence_feature = shared_dropout(shared_pooling(violence_lstm))
hate_feature = shared_dropout(shared_pooling(hate_lstm))

In [77]:
#Output layers

emotion_output = keras.layers.Dense(units=6, activation='softmax', name='emotion_output')(emotion_feature)
violence_output = keras.layers.Dense(units=5, activation='softmax', name='violence_output')(violence_feature)
hate_output = keras.layers.Dense(units=3, activation='softmax', name='hate_output')(hate_feature)

In [78]:
#cOMPILE the Model

model = keras.models.Model(inputs=[emotion_input_layer, violence_input_layer, hate_input_layer], outputs=[emotion_output, violence_output, hate_output])
model.compile(optimizer='adam', loss={
    'emotion_output': 'sparse_categorical_crossentropy',
    'violence_output': 'sparse_categorical_crossentropy',
    'hate_output': 'sparse_categorical_crossentropy'
                                    },
              metrics={
                  'emotion_output': 'accuracy',
                  'violence_output': 'accuracy',
                  'hate_output': 'accuracy'
              })


In [79]:
model.summary()

In [80]:
# Training the model

model.fit(x = {
    'emotion_input': emotion_input,
    'violence_input': violence_input,
    'hate_input': hate_input
},
          y = {
              'emotion_output': emotion_labels,
              'violence_output': violence_labels,
              'hate_output': hate_labels
          },
          epochs = 10,
          batch_size = 4,
          # validation_split=0.2
          )

Epoch 1/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 17ms/step - emotion_output_accuracy: 0.2004 - emotion_output_loss: 1.7976 - hate_output_accuracy: 0.7021 - hate_output_loss: 0.7263 - loss: 2.9355 - violence_output_accuracy: 0.8583 - violence_output_loss: 0.4116
Epoch 2/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 18ms/step - emotion_output_accuracy: 0.7339 - emotion_output_loss: 0.7199 - hate_output_accuracy: 0.8782 - hate_output_loss: 0.3612 - loss: 1.1463 - violence_output_accuracy: 0.9811 - violence_output_loss: 0.0652
Epoch 3/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 17ms/step - emotion_output_accuracy: 0.9447 - emotion_output_loss: 0.1838 - hate_output_accuracy: 0.9355 - hate_output_loss: 0.1941 - loss: 0.3883 - violence_output_accuracy: 0.9975 - violence_output_loss: 0.0105
Epoch 4/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 18ms/step - emotion_output_accuracy: 0.96

<keras.src.callbacks.history.History at 0x7df233ccde10>

## Prediciton & Evaluations

In [84]:
prediction = model.predict({'emotion_input': emotion_input, 'violence_input': violence_input, 'hate_input': hate_input})

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step


In [85]:
prediction

[array([[9.9999046e-01, 8.2265586e-07, 4.0830521e-08, 7.7059885e-06,
         6.8722494e-07, 2.6947941e-07],
        [9.9999821e-01, 2.1884239e-07, 6.8039854e-09, 1.2557878e-06,
         1.5919042e-07, 1.2520944e-07],
        [9.9999893e-01, 2.0082101e-07, 1.8573314e-09, 7.6760574e-07,
         9.8796249e-08, 5.7971739e-08],
        ...,
        [2.5067893e-06, 9.4637908e-06, 9.9643275e-06, 1.2168459e-05,
         2.1840276e-03, 9.9778181e-01],
        [1.1549790e-07, 2.0069098e-07, 3.6069676e-07, 1.3144779e-07,
         4.1958538e-06, 9.9999499e-01],
        [2.1600512e-05, 7.9605916e-06, 3.1940112e-05, 4.0669791e-05,
         8.9023757e-05, 9.9980885e-01]], dtype=float32),
 array([[6.9763639e-11, 1.0000000e+00, 8.2802419e-12, 1.5027658e-10,
         1.0072802e-11],
        [2.5625063e-06, 9.9998033e-01, 1.5032340e-06, 1.5653080e-05,
         2.2963235e-08],
        [5.1488045e-09, 1.0000000e+00, 3.2049863e-10, 4.6263821e-10,
         4.5400420e-10],
        ...,
        [7.8900818e-0

In [86]:
emotion_pred = np.argmax(prediction[0], axis=1)
violence_pred = np.argmax(prediction[1], axis=1)
hate_pred = np.argmax(prediction[2], axis=1)

In [88]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt


In [89]:
def plot_matirx(true, pred, title, labels):
  cm = confusion_matrix(true, pred, normalize='true')
  plt.figure(figsize=(8, 6))
  sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues', xticklabels=labels, yticklabels=labels)
  plt.title('Confusion Matrix')
  plt.xlabel('Predicted Labels')
  plt.ylabel('True Labels')
  plt.show()

emotion_labels = ['sadnes', 'joy', 'love', 'anger', 'fear', 'surprise']
violence_labels = ['sexual_violence', 'physical_violence', 'emotional_violence', 'Harmful_traditional_practice', 'economic_violence']
hate_labels = ['offensive speech', 'Neither', 'Hate Speech']
# plot_matirx(emotion_labels, emotion_pred, 'Emotion', emotion_labels)

In [90]:
plot_matirx(violence_labels, violence_pred, 'Violence', violence_labels)


ValueError: Found input variables with inconsistent numbers of samples: [5, 12000]