# **DataSet**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the data
data = pd.read_csv('SONDATA.csv')

# Preprocess Data
data.isnull().sum()
data['Text'].fillna('', inplace=True)

# Encode the labels
label_encoder = LabelEncoder()
data['Label'] = label_encoder.fit_transform(data['Label'])

# Tokenization
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(data['Text'])
sequences = tokenizer.texts_to_sequences(data['Text'])
padded_sequences = pad_sequences(sequences, maxlen=200, padding='post', truncating='post')

# Data Split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, data['Label'], test_size=0.2, random_state=42)

# **Training The Model**

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import L2
from tensorflow.keras.losses import BinaryCrossentropy

# Model
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=200),
    LSTM(128, return_sequences=True),
    Dropout(0.5),
    LSTM(128),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_regularizer=L2(0.01)),
    Dropout(0.5),
    Dense(32, activation='relu', kernel_regularizer=L2(0.01)),
    Dense(16, activation='relu', kernel_regularizer=L2(0.01)),
    Dense(1, activation='linear', kernel_regularizer=L2(0.01))
])


model.compile(loss=BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:

history = model.fit(X_train, y_train, epochs=16, batch_size=8, validation_split=0.2)

In [None]:
from sklearn.metrics import classification_report

# Evaluation
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Apply Sigmoid Function to the Logits
y_pred = (tf.nn.sigmoid(model.predict(X_test)).numpy() > 0.5).astype(int)


# Classification report
y_test_labels = label_encoder.inverse_transform(y_test)
y_pred_labels = label_encoder.inverse_transform(y_pred.flatten())

target_names = [str(cls) for cls in label_encoder.classes_]

print(classification_report(y_test_labels, y_pred_labels, target_names=target_names))

# **Save Model**

In [None]:
import joblib
# Save the model to a file using joblib
filename = 'your_model.joblib'
joblib.dump(model, filename)


In [None]:
from keras.models import load_model

# Assuming model is the trained Keras model
model.save('model2.keras')

# Save the tokenizer
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the label encoder
with open('label_encoder.pickle', 'wb') as handle:
    pickle.dump(label_encoder, handle, protocol=pickle.HIGHEST_PROTOCOL)