In [None]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv(os.path.join('..','A-Machine Learning','NLP Data','Comment Classification','train','train.csv'))
df

In [None]:
df.info()

In [None]:
df[df['toxic']==1]

In [None]:
df.columns[2:]

In [None]:
df[df.columns[2:]]

In [None]:
X = df['comment_text']
Y = df[df.columns[2:]].values
MAX_WORDS = 200000

In [None]:
vectorizer = TextVectorization(max_tokens=MAX_WORDS, output_mode='int', output_sequence_length=1800)
vectorizer.adapt(X.values)
V_text = vectorizer(X.values)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((V_text,Y))
dataset = dataset.cache()
dataset = dataset.shuffle(16000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8)

In [None]:
train = dataset.take(int(len(dataset)*0.7))
val = dataset.skip(int(len(dataset)*0.7)).take(int(len(dataset)*0.2))
test = dataset.skip(int(len(dataset)*0.9)).take(int(len(dataset)*0.1))

In [None]:
model = Sequential()
model.add(Embedding(MAX_WORDS+1,32))
model.add(Bidirectional(LSTM(32, activation='tanh')))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(6, activation='sigmoid'))
model.summary()

In [None]:
model.compile(loss='BinaryCrossentropy',optimizer='adam')

In [None]:
history = model.fit(train,epochs=3,validation_data=val)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize=(20,20))
pd.DataFrame(history.history).plot()
plt.show()

In [None]:
test_text = vectorizer('fuck you asshole')

In [None]:
res = model.predict(np.expand_dims(test_text,0))

In [None]:
res = pd.DataFrame(res)
res.columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

In [None]:
res

In [None]:
pre = Precision()
rec = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():
    X_true,y_true = batch
    yhat = model.predict(X_true)
    
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true,yhat)
    rec.update_state(y_true,yhat)
    acc.update_state(y_true,yhat)

In [None]:
print('Precision: ',pre.result().numpy())
print('Recall: ',rec.result().numpy())
print('Accuracy: ',acc.result().numpy())