# 1. Import library:

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from IPython.display import display
import warnings
warnings.filterwarnings('ignore')

# 2. Reading data:

In [None]:
train_df = pd.read_csv('/kaggle/input/mydshahed/Train.csv')

In [None]:
train_df.head()

In [None]:
train_df.tail()

In [None]:
train_df.shape

In [None]:
train_df.info()

# 3. EDA:

In [None]:
train_df.isnull().sum()

In [None]:
train_df.duplicated().sum()

In [None]:
plt.grid()
sns.countplot(x=train_df['label'], hue=train_df['label'])
plt.title('Labels before drop duplicated row')
plt.show()

In [None]:
train_df.drop_duplicates(inplace = True)

In [None]:
train_df.reset_index(inplace=True)

In [None]:
train_df.drop('index', inplace = True, axis = 1)

In [None]:
plt.grid()
sns.countplot(x=train_df['label'], hue=train_df['label'])
plt.title('Labels after drop duplicated row')
plt.show()

## Plot the most common word for each class

In [None]:
from wordcloud import WordCloud

positive_tweets = train_df[train_df['label'] == 1]['text']

positive_text = ' '.join(positive_tweets)

wordcloud = WordCloud(width=800, height=400, background_color='white').generate(positive_text)

plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - Positive Sentiment')
plt.show()

In [None]:
negative_tweets = train_df[train_df['label'] == 0]['text']

negative_text = ' '.join(negative_tweets)

wordcloud = WordCloud(width=800, height=400, background_color='white').generate(negative_text)

plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.title('Word Cloud - Negative Sentiment')
plt.show()

# 4. Data Cleaning:

## Convert all the data to lower case

In [None]:
train_df = train_df.applymap(lambda x: x.lower() if isinstance(x, str) else x)

In [None]:
train_df.head()

In [None]:
# Take a look to first 3 rows
for i in range(3):
  display(train_df['text'][i])

In [None]:
#Made a new column contain the number of letter in every row
train_df['letter_count'] = train_df['text'].apply(len)
train_df.head(3)

In [None]:
#print the longest text
train_df.iloc[train_df['letter_count'].idxmax()]

## Remove numbers and every strang symbol

In [None]:
import re

def remove_strang(data):

    for i in range(len(data)):

      text = data[i]

      text = re.sub("[^a-z\s\']", "", text)

      pattern = r'\b\w*(\w)\1{2, }\w*\\b|\b\w{1}\b'

      data[i] = re.sub(pattern, '', text)

    return data

In [None]:
text = remove_strang(train_df['text'])

In [None]:
text

In [None]:
for i in range(3):

  display(text[i])

In [None]:
text[0]

In [None]:
len(text)

## Make a Tokenization

In [None]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize, sent_tokenize


def tokenization(text):

        token_text = []
        for i in range(len(text)):
            texts = text[i]
            token_texts = word_tokenize(texts)
            token_text.append(token_texts)
        return token_text



In [None]:
tokenize_text = tokenization(text)

In [None]:
tokenize_text

In [None]:
tokenize_text[0]

In [None]:
len(tokenize_text)

## Remove Stop word

In [None]:
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')

print(stopwords.words('english'))

In [None]:
negation_exceptions = {'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
                         'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't",
                         'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't",
                         'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't",
                         'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"}

In [None]:
def stopword_removal(tokenize_text):

    filtered_text = []

    stop_words = set(stopwords.words('english'))
    stop_words.update(['film', 'move', 'br', 'one', 'character'])
    stop_words.remove('not')
    stop_words.remove('no')

    for i in range(len(tokenize_text)):

        stop_words = stop_words - negation_exceptions

        filtered_sentence = [word for word in tokenize_text[i] if word.lower() not in stop_words]

        filtered_text.append(filtered_sentence)

    return filtered_text

In [None]:
filtered_text = stopword_removal(tokenize_text)

In [None]:
filtered_text

In [None]:
filtered_text[0]

In [None]:
len(filtered_text)

## Make a Stemming

In [None]:
from nltk.stem import PorterStemmer


def Stemming(filtered_text):

    ps = PorterStemmer()

    stemm_text = []

    for i in range(len(filtered_text)):
        
      stemm_word = [ps.stem(word) for word in filtered_text[i]]
      stemm_text.append(stemm_word)

    return stemm_text



In [None]:
stemm_text = Stemming(filtered_text)

In [None]:
len(stemm_text)

In [None]:
stemm_text

In [None]:
stemm_text[0]

## Make a cleaned and preprocessed Data Frame

In [None]:
def make_df(stemm_text):

    text = []

    for i in range(len(stemm_text)):
      word_list = stemm_text[i]
      sentence = ' '.join(word_list).strip()
      text.append(sentence)

    return text

In [None]:
train_df['text'] = pd.DataFrame(make_df(stemm_text))

In [None]:
train_df.head(4)

In [None]:
train_df.info()

In [None]:
train_df.iloc[train_df['letter_count'].idxmax()]

*****************************************************************

## Create a Text clean function

In [None]:
def clean_text(data):
    
    data = data.applymap(lambda x: x.lower() if isinstance(x, str) else x)

    text = remove_strang(data['text'])

    tokenize_text = tokenization(text)

    filtered_text = stopword_removal(tokenize_text)

    stemm_text = Stemming(filtered_text)

    data['text'] = pd.DataFrame(make_df(stemm_text))

    return data
    

# 5. Data preprocessing & building a ML models:

## Read & preprocess val and test data

In [None]:
val_df = pd.read_csv("/kaggle/input/mydshahed/Valid.csv")
test_df = pd.read_csv("/kaggle/input/mydshahed/Test.csv")

In [None]:
print(f"Validation data shape: {val_df.shape}")
print(f"Test data shape: {test_df.shape}")

In [None]:
val_df = clean_text(val_df)
val_df.head(3)

## Split the data

In [None]:
X_train = train_df['text']
y_train = train_df['label']
X_val   = val_df['text']
y_val   = val_df['label']

## CountVectorizer

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

count_vectorizer = CountVectorizer()

### MultinomialNB:

In [None]:
from sklearn.naive_bayes import MultinomialNB

multi_model = MultinomialNB()

In [None]:
from sklearn.pipeline import Pipeline

pipeline = Pipeline([('vectorizer', count_vectorizer),
                     ('classifier', multi_model)])

In [None]:
"""note for cross validation there are 2 ways,
   the 1st one is used cross_val_score and this approch just givs the performence to our model
   and we must fit our model after apply it.
   the 2nd cross_validate with return_estimator=True this train and save the best fit model,
   so we dont need to fit our model again.""" 

from sklearn.model_selection import cross_validate

cv_scores = cross_validate(pipeline, X_train, y_train, cv=5, 
                            scoring='accuracy', return_estimator=True)

In [None]:
# get best fold's model
best_multi_model = cv_scores['estimator'][cv_scores['test_score'].argmax()]  

In [None]:
y_pred = best_multi_model.predict(X_val)

In [None]:
from sklearn.metrics import accuracy_score

multi_accuracy = accuracy_score(y_val, y_pred)

print(f"\nValidation set accuracy: {multi_accuracy:.4f}")

### DecisionTree

In [None]:
from sklearn.tree import DecisionTreeClassifier

dec_tree_model = DecisionTreeClassifier()

In [None]:
pipeline = Pipeline([('vectorizer', count_vectorizer),
                     ('classifier', dec_tree_model)])

In [None]:
cv_scores = cross_validate(pipeline, X_train, y_train, cv=5,
                          scoring='accuracy', return_estimator=True)

In [None]:
best_tree_model = cv_scores['estimator'][cv_scores['test_score'].argmax()]

In [None]:
y_pred = best_tree_model.predict(X_val)

In [None]:
dec_tree_accuracy = accuracy_score(y_val, y_pred)  

print(f"\nValidation set accuracy: {dec_tree_accuracy:.4f}")

### Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

RF_model = RandomForestClassifier()

In [None]:
pipeline = Pipeline([('vectorizer', count_vectorizer),
                     ('classifier', RF_model)])

In [None]:
cv_scores = cross_validate(pipeline, X_train, y_train, cv=5,
                          scoring = 'accuracy',return_estimator=True)

In [None]:
best_RF_model = cv_scores['estimator'][cv_scores['test_score'].argmax()]

In [None]:
y_pred = best_RF_model.predict(X_val)

In [None]:
RF_accuracy = accuracy_score(y_val, y_pred)  

print(f"\nValidation set accuracy: {RF_accuracy:.4f}")

### XGBoost

In [None]:
import xgboost as xgb

xgb_model = xgb.XGBClassifier()

In [None]:
pipeline = Pipeline([('vectorizer', count_vectorizer),
                     ('classifier', xgb_model)])

In [None]:
cv_scores = cross_validate(pipeline, X_train, y_train, cv=5,
                          scoring='accuracy', return_estimator=True)

In [None]:
best_xgb_model = cv_scores['estimator'][cv_scores['test_score'].argmax()]

In [None]:
y_pred = best_xgb_model.predict(X_val)

In [None]:
xgb_accuracy = accuracy_score(y_pred, y_val)

print(f"\nValidation set accuracy: {xgb_accuracy:.4f}")

**************************************

In [None]:
models = ['MultinomialNB', 'DecisionTree', 'Random forest', 'XGBoost']

In [None]:
scores = [multi_accuracy, dec_tree_accuracy, RF_accuracy, xgb_accuracy]

In [None]:
scores

In [None]:
df = {'model':models, 'score':scores}

In [None]:
df = pd.DataFrame(df)

In [None]:
df

In [None]:
plt.grid()
ax = sns.barplot(x='model', y='score', hue='model', data=df)
plt.title('Accuracy for models with CountVectorizer')

for p in ax.patches:
    ax.annotate(
        f'{p.get_height():.3f}',  
        (p.get_x() + p.get_width() / 2., p.get_height()),  
        ha='center',  
        va='center',  
        xytext=(0, 7),
        textcoords='offset points'
    )

plt.show()

***************************************************************

## TfidfVectorizer

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

TFidf_vectorizer = TfidfVectorizer()

### MultinomialNB

In [None]:
pipeline = Pipeline([('vectorizer', TFidf_vectorizer),
                     ('classifier', multi_model)])

In [None]:
cv_results = cross_validate(pipeline, X_train, y_train, cv=5,
                           scoring='accuracy', return_estimator=True)

In [None]:
best_muti_estimator = cv_results['estimator'][cv_results['test_score'].argmax()]

In [None]:
y_pred = best_muti_estimator.predict(X_val)

In [None]:
multi_accuracy_TFidf = accuracy_score(y_pred, y_val)

print(f"\nValidation set accuracy: {multi_accuracy_TFidf:.4f}")

### DecisionTree

In [None]:
pipeline = Pipeline([('vectorizer', TFidf_vectorizer),
                     ('classifier', dec_tree_model)])

In [None]:
cv_results = cross_validate(pipeline, X_train, y_train, cv=5,
                           scoring='accuracy', return_estimator=True)

In [None]:
best_tree_estimator = cv_results['estimator'][cv_results['test_score'].argmax()]

In [None]:
y_pred = best_tree_estimator.predict(X_val)

In [None]:
dec_accuracy_TFidf = accuracy_score(y_pred, y_val)

print(f"\nValidation set accuracy: {dec_accuracy_TFidf:.4f}")

### Random forest

In [None]:
pipeline = Pipeline([('vectorizer', TFidf_vectorizer),
                     ('classifier', RF_model)])

In [None]:
cv_results = cross_validate(pipeline, X_train, y_train, cv=5,
                           scoring='accuracy', return_estimator=True)

In [None]:
best_RF_estimator = cv_results['estimator'][cv_results['test_score'].argmax()]

In [None]:
y_pred = best_RF_estimator.predict(X_val)

In [None]:
RF_accracy_TFidf = accuracy_score(y_pred, y_val)

print(f"\nValidation set accuracy: {RF_accracy_TFidf:.4f}")

### XGBoost

In [None]:
pipeline = Pipeline([('vectorizer', TFidf_vectorizer),
                     ('classifier', xgb_model)])

In [None]:
cv_results = cross_validate(pipeline, X_train, y_train, cv=5,
                           scoring='accuracy', return_estimator=True)

In [None]:
best_xgb_estimator = cv_results['estimator'][cv_results['test_score'].argmax()]

In [None]:
y_pred = best_xgb_estimator.predict(X_val)

In [None]:
xgb_accuracy_TFidf = accuracy_score(y_pred, y_val)

print(f"\nValidation set accuracy: {xgb_accuracy_TFidf:.4f}")

*************************************************************

In [None]:
TFidf_scores = [multi_accuracy_TFidf, dec_accuracy_TFidf, RF_accracy_TFidf, xgb_accuracy_TFidf]

In [None]:
TFidf_scores

In [None]:
df2 = {'model':models, 'score':TFidf_scores}

In [None]:
df2 = pd.DataFrame(df2)

In [None]:
df2

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

sns.barplot(ax=axes[0], x='model', y='score', hue='model', data=df)
axes[0].set_title('Accuracy for models with CountVectorizer')
axes[0].grid(True)


for container in axes[0].containers:
    axes[0].bar_label(container, fmt='%.3f', padding=3)  

sns.barplot(ax=axes[1], x='model', y='score', hue='model', data=df2)
axes[1].set_title('Accuracy for models with TF-IDF')
axes[1].grid(True)

for container in axes[1].containers:
    axes[1].bar_label(container, fmt='%.3f', padding=3)

plt.tight_layout()
plt.show()

#### The best model is * MultinomialNB * with TF-IDF feature extraction, so we'll use it

***************************************************************

## Developed the Model

In [None]:
from sklearn.model_selection import GridSearchCV


pipeline = Pipeline([
    ('vectorizer', TfidfVectorizer()),
    ('classifier', MultinomialNB())
])


param_grid = {
    'vectorizer__max_features': [1000, 5000, 10000],
    'vectorizer__ngram_range': [(1, 1), (1, 2)],  
    'vectorizer__min_df': [1, 2, 5],  
    'classifier__alpha': [0.1, 0.5, 1.0, 2.0],  
    'classifier__fit_prior': [True, False]  
}


grid_search = GridSearchCV(
    pipeline, 
    param_grid, 
    cv=5,  
    scoring='accuracy',
    n_jobs=-1  
)
grid_search.fit(X_train, y_train)


best_model = grid_search.best_estimator_
print("Best parameters:", grid_search.best_params_)


y_pred = best_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_pred)
print(f"Tuned Validation Accuracy: {val_accuracy:.4f}")

In [None]:
from joblib import dump

dump(best_model, 'multinomial_nb_tfidf.joblib')

*******************************************************

In [None]:
def predict_df(data):


    data = data.applymap(lambda x: x.lower() if isinstance(x, str) else x)

    text = remove_strang(data['text'])

    tokenize_text = tokenization(text)

    filtered_text = stopword_removal(tokenize_text)

    stemm_text = Stemming(filtered_text)

    data['text'] = pd.DataFrame(make_df(stemm_text))

    X = data['text']

    y_pred = model.best_model(X)

    return y_pred

**************************************************

In [None]:
stop_words = set(stopwords.words('english'))
stop_words.update(['film', 'move', 'br', 'one', 'character'])
stop_words.remove('not')
stop_words.remove('no')
stop_words = stop_words - negation_exceptions

def predict_smple(sample):

    sample = sample.lower()

    sample = re.sub("[^a-z\s\']", "", sample)

    pattern = r'\b\w*(\w)\1{2, }\w*\\b|\b\w{1}\b'

    sample = re.sub(pattern, '', sample)

    token_sample = word_tokenize(sample)

    filtered_sample = [word for word in token_sample if word not in stop_words]

    ps = PorterStemmer()
    stemm_text = [ps.stem(word) for word in filtered_sample]

    sample = ' '.join(stemm_text)

    sample = [sample]

    X = sample

    y = best_model.predict(X)

    if y == 1:
        print('           Positive')
        img = Image.open('positive.png')

    elif y == 0:
        print('           Negative')
        img = Image.open('negative.png')


    return display(img)


In [None]:
sample1 = "it was good"
print(predict_smple(sample1))

In [None]:
sample2 = "it wasnt good"
print(predict_smple(sample2))

*******************************************************************************************

# 6. Data preprocessing & building DL Models:

## Data Preparation

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Tokenize text
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)

In [None]:
# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)

In [None]:
X_train_seq[0]

In [None]:
len(X_train_seq)

In [None]:
train_lengths = [len(seq) for seq in X_train_seq]

# Plot distribution
plt.figure(figsize=(10, 6))
plt.hist(train_lengths, bins=50)
plt.xlabel('Sequence Length')
plt.ylabel('Frequency')
plt.title('Text Length Distribution')
plt.show()


import numpy as np
print(f"50th percentile: {np.percentile(train_lengths, 50)}")
print(f"90th percentile: {np.percentile(train_lengths, 90)}")
print(f"95th percentile: {np.percentile(train_lengths, 95)}")
print(f"99th percentile: {np.percentile(train_lengths, 99)}")
print(f"Max length: {max(train_lengths)}")


max_len = int(np.percentile(train_lengths, 95)) 
print(f"\nRecommended max_len: {max_len}")

In [None]:
# Pad sequences
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_val_pad = pad_sequences(X_val_seq, maxlen=max_len, padding='post', truncating='post')

In [None]:
X_train_pad[0]

In [None]:
len(X_train_pad)

## Model Building and Comparison

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, GlobalMaxPool1D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.optimizers import Adam

# Common parameters
vocab_size = 10000
embedding_dim = 128

# Callback to save best model based on val_loss
checkpoint = ModelCheckpoint('best_DL_model.keras', 
                            monitor='val_loss', 
                            save_best_only=True, 
                            mode='min',
                            verbose=1)

early_stop = EarlyStopping(monitor='val_loss', patience=3)

### LSTM Model

In [None]:
lstm_model = Sequential()
lstm_model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
lstm_model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
lstm_model.add(Dense(1, activation='sigmoid'))

In [None]:
lstm_model.compile(loss='binary_crossentropy', 
                 optimizer=Adam(1e-4), 
                 metrics=['accuracy'])

In [None]:
lstm_history = lstm_model.fit(X_train_pad, y_train,
                             epochs= 3,
                             batch_size= 64,
                             validation_data=(X_val_pad, y_val),
                             callbacks=[checkpoint, early_stop])

In [None]:
loss, acc = lstm_model.evaluate(X_val_pad, y_val)

### Bidirectional LSTM

In [None]:
checkpoint = ModelCheckpoint('best_bid_model.keras', 
                                monitor='val_loss', 
                                save_best_only=True, 
                                mode='min',
                                verbose=1)

early_stop = EarlyStopping(monitor='val_loss', patience=3)

In [None]:
Bidirectional_model = Sequential()
Bidirectional_model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
Bidirectional_model.add(Bidirectional(LSTM(64, dropout=0.2, recurrent_dropout=0.2)))
Bidirectional_model.add(Dense(1, activation='sigmoid'))

In [None]:
Bidirectional_model.compile(loss='binary_crossentropy', 
                             optimizer=Adam(1e-4), 
                             metrics=['accuracy'])

In [None]:
bilstm_history = Bidirectional_model.fit(X_train_pad, y_train,
                                         epochs=3,
                                         batch_size=64,
                                         validation_data=(X_val_pad, y_val),
                                         callbacks=[checkpoint, early_stop])

In [None]:
loss, acc = Bidirectional_model.evaluate(X_val_pad, y_val)

### test model

In [None]:
checkpoint = ModelCheckpoint('best_model.keras', 
                                monitor='val_loss', 
                                save_best_only=True, 
                                mode='min',
                                verbose=1)

early_stop = EarlyStopping(monitor='val_loss', patience=3)

In [None]:
import keras

model = keras.Sequential([
    keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_len),
    keras.layers.Bidirectional(keras.layers.LSTM(100, dropout=0.2, recurrent_dropout=0.2)),
    keras.layers.Dense(24, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])


In [None]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
history = model.fit(X_train_pad, y_train,
                     epochs=3,
                     batch_size=64,
                     validation_data=(X_val_pad, y_val),
                     callbacks=[checkpoint, early_stop])

In [None]:
loss, acc = model.evaluate(X_val_pad, y_val)

*************************************

### Models Comparsion

In [None]:
def plot_history(histories, labels):
    plt.figure(figsize=(12, 5))
    
    # Accuracy plot
    plt.subplot(1, 2, 1)
    for history, label in zip(histories, labels):
        plt.plot(history.history['val_accuracy'], label=label)
    plt.title('Model Validation Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    
    # Loss plot
    plt.subplot(1, 2, 2)
    for history, label in zip(histories, labels):
        plt.plot(history.history['val_loss'], label=label)
    plt.title('Model Validation Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

histories = [lstm_history, bilstm_history, history]
labels = ['LSTM', 'BiLSTM', 'BiLSTM_2']
plot_history(histories, labels)

*********************************************

### Try the best model on new sample


In [None]:
stop_words = set(stopwords.words('english'))
stop_words.update(['br', 'one', 'character', 'film', 'movie'])
stop_words.remove('not')
stop_words.remove('no')
stop_words = stop_words - negation_exceptions


def predict_sentiment1(sample):


    sample = sample.lower()

    sample = re.sub("[^a-z\s\']", "", sample)

    pattern = r'\b\w*(\w)\1{2, }\w*\\b|\b\w{1}\b'

    sample = re.sub(pattern, '', sample)

    token_sample = word_tokenize(sample)

    filtered_sample = [word for word in token_sample if word not in stop_words]

    ps = PorterStemmer()
    stemm_text = [ps.stem(word) for word in filtered_sample]

    sample = ' '.join(stemm_text)

    sample = [sample]

    text = sample

    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen = 200)

    prediction = model.predict(padded_sequence)

    #prediction = np.argmax(prediction, axis=1)

    if prediction >= 0.5:
        print('           Positive')
        img = Image.open('positive.png')

    else:
        print('           Negative')
        img = Image.open('negative.png') 

    return display(img)

In [None]:
sample3 = "it wasnt a good filme"
predict_sentiment1(sample3)

In [None]:
predict_sentiment1("it was a good film")

                                                                        :)