In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the dataset
df = pd.read_csv('final.csv')

# Ensure all entries in 'tweet' column are strings and handle NaNs
df['tweet'] = df['tweet'].astype(str).fillna('')

# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer(max_features=5000)  # Adjust max_features as needed

# Fit and transform the tweets to TF-IDF features
X = vectorizer.fit_transform(df['tweet']).toarray()

# Assuming the target variable is named 'class' and is already cleaned
y = df['class'].values

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))  

# Compile the model
model.compile(optimizer=Adam(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)

# Calculate accuracy and precision
accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')

# Detailed classification report
print(classification_report(y_test, y_pred_classes, target_names=['Normal', 'Offensive', 'Hate']))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 13ms/step - accuracy: 0.7671 - loss: 0.7009 - val_accuracy: 0.8754 - val_loss: 0.3692
Epoch 2/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - accuracy: 0.8783 - loss: 0.3519 - val_accuracy: 0.8926 - val_loss: 0.3076
Epoch 3/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.9097 - loss: 0.2582 - val_accuracy: 0.8944 - val_loss: 0.2942
Epoch 4/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 14ms/step - accuracy: 0.9226 - loss: 0.2178 - val_accuracy: 0.8933 - val_loss: 0.2961
Epoch 5/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.9337 - loss: 0.1866 - val_accuracy: 0.8921 - val_loss: 0.3048
Epoch 6/10
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9439 - loss: 0.1678 - val_accuracy: 0.8891 - val_loss: 0.3189
Epoch 7/10
[1m496/