# **DataSet**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the data
data = pd.read_csv('SONDATA.csv')

# Preprocess Data
data.isnull().sum()
data['Text'].fillna('', inplace=True)

# Encode the labels
label_encoder = LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

# Tokenization
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(data['Text'])
sequences = tokenizer.texts_to_sequences(data['Text'])
padded_sequences = pad_sequences(sequences, maxlen=200, padding='post', truncating='post')

# Data Split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, data['Label'], test_size=0.2, random_state=42)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Text'].fillna('', inplace=True)


# **Training The Model**

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import L2
from tensorflow.keras.losses import BinaryCrossentropy

# Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=200),
    LSTM(128, return_sequences=True),
    Dropout(0.5),
    LSTM(128),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_regularizer=L2(0.01)),
    Dropout(0.5),
    Dense(32, activation='relu', kernel_regularizer=L2(0.01)),
    Dense(16, activation='relu', kernel_regularizer=L2(0.01)),
    Dense(1, activation='linear', kernel_regularizer=L2(0.01))
])


model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
model.summary()



In [3]:

history = model.fit(X_train, y_train, epochs=16, batch_size=8, validation_split=0.2)

Epoch 1/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 43ms/step - accuracy: 0.8879 - loss: 0.7877 - val_accuracy: 0.8860 - val_loss: 0.3726
Epoch 2/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 43ms/step - accuracy: 0.8923 - loss: 0.3297 - val_accuracy: 0.8860 - val_loss: 0.1752
Epoch 3/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 42ms/step - accuracy: 0.8908 - loss: 0.1736 - val_accuracy: 0.8860 - val_loss: 0.1387
Epoch 4/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 42ms/step - accuracy: 0.9371 - loss: 0.1267 - val_accuracy: 0.9576 - val_loss: 0.1306
Epoch 5/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 41ms/step - accuracy: 0.9828 - loss: 0.1050 - val_accuracy: 0.9857 - val_loss: 0.0992
Epoch 6/16
[1m1570/1570[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 42ms/step - accuracy: 0.9943 - loss: 0.0801 - val_accuracy: 0.9863 - val_loss: 0.0877
Epoc

In [4]:
from sklearn.metrics import classification_report

# Evaluation
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Apply Sigmoid Function to the Logits
y_pred = (tf.nn.sigmoid(model.predict(X_test)).numpy() > 0.5).astype(int)


# Classification report
y_test_labels = label_encoder.inverse_transform(y_test)
y_pred_labels = label_encoder.inverse_transform(y_pred.flatten())

target_names = [str(cls) for cls in label_encoder.classes_]

print(classification_report(y_test_labels, y_pred_labels, target_names=target_names))

[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.9928 - loss: 0.0759
Test Accuracy: 0.99
[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      3516
           1       0.97      0.95      0.96       408

    accuracy                           0.99      3924
   macro avg       0.98      0.97      0.98      3924
weighted avg       0.99      0.99      0.99      3924



# **Save Model**

In [6]:
import joblib
# Save the model to a file using joblib
filename = 'your_model.joblib'
joblib.dump(model, filename)


In [11]:
from keras.models import load_model

# Assuming model is the trained Keras model
model.save('model2.keras')

# Save the tokenizer
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the label encoder
with open('label_encoder.pickle', 'wb') as handle:
    pickle.dump(label_encoder, handle, protocol=pickle.HIGHEST_PROTOCOL)