### Install Requirements

In [1]:
!pip install hazm
!pip install urlextract
!pip install emojis

Collecting hazm
  Obtaining dependency information for hazm from https://files.pythonhosted.org/packages/91/8c/cc3d01c27681eb8223781ea162a23f9926647ce864eb601a19aee4bce0af/hazm-0.10.0-py3-none-any.whl.metadata
  Downloading hazm-0.10.0-py3-none-any.whl.metadata (11 kB)
Collecting fasttext-wheel<0.10.0,>=0.9.2 (from hazm)
  Downloading fasttext_wheel-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting nltk<4.0.0,>=3.8.1 (from hazm)
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
Collecting python-crfsuite<0.10.0,>=0.9.9 (from hazm)
  Obtaining dependency information for python-crfsuite<0.10.0,>=0.9.9 from https://files.pythonhosted.org/packages/38/1d/c475ba7d11e9735f00eb08e2f5315aa2e21c24cc

### Import Requirements

In [2]:
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.utils import to_categorical
import numpy as np
import re
from urlextract import URLExtract
import emojis
import tensorflow as tf
import ipywidgets as widgets
from IPython.display import display, clear_output
from xgboost import XGBClassifier
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics import accuracy_score, f1_score

### Preprocessing

In [3]:
class Preprocessor:
    def __init__(self):
        self.extractor = URLExtract()

    def _multiple_replace(self, mapping, text):
        pattern = "|".join(map(re.escape, mapping.keys()))
        return re.sub(pattern, lambda m: mapping[m.group()], str(text))

    def convert_fa_numbers(self, input_str):
        mapping = {
            '۰': '0',
            '۱': '1',
            '۲': '2',
            '۳': '3',
            '۴': '4',
            '۵': '5',
            '۶': '6',
            '۷': '7',
            '۸': '8',
            '۹': '9',
            '.': '.',
        }
        return self._multiple_replace(mapping, input_str)

    def convert_ar_characters(self, input_str):
        mapping = {
            'ك': 'ک',
            'ى': 'ی',
            'ي': 'ی',
            'ئ': 'ی',
            'إ': 'ا',
            'أ': 'ا',
            'ة': 'ه',
            'ؤ': 'و'
        }
        return self._multiple_replace(mapping, input_str)

    def preprocess(self, text):
        for url in self.extractor.gen_urls(text):
            text = text.replace(url, '<URL>')
        emj = emojis.get(text)
        for i in emj:
            if i in text:
                text = text.replace(i, '<emoji>')
        text = self.convert_fa_numbers(text)
        text = self.convert_ar_characters(text)
        text = re.sub(r"(:\s?\)|:-\)|\(\s?:|\(-:|:\'\)|:\s?D|8-\)|:\s?\||;\s?\)|:-\*|:-\||:-\(|:\s?P|:-P|:-p|:-b|:-O|:-o|:-0|:-\@|:\$|:-\^|:-&|:-\*|:-\+|:-\~|:-\`|:-\>|:-\<|:-\}|:-\{|\[:\s?\]|\[:\s?\]|:\s?\]|:\s?\[|:\s?\}|:\s?\{)", '<smiley>', text)
        text = text.lower()
        text = text.strip()
        text = re.sub(r'[<>#.:()"\'!?؟،,@$%^&*_+\[\]/]', ' ', text)
        text = re.sub(r'[\s]{2,}', ' ', text)
        text = re.sub(r'(\w)\1{2,}', r'\1', text)
        if re.search(r'[\u0600-\u06FF]', text):
            return text
        else:
            return 'None'



### Models

In [4]:
class SentimentModel:
    def __init__(self):
        self.model = Sequential()

    def build_model(self, input_dim, nb_classes=2):
        self.model.add(Dense(1000, input_shape=(input_dim[1],)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(500))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(50))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(nb_classes))
        self.model.add(Activation('softmax'))
        self.model.compile(loss='categorical_crossentropy', optimizer='adam')

    def train_model(self, X_train, y_train_cat, batch_size=32, epochs=10, verbose=2):
        self.model.fit(X_train, y_train_cat, batch_size=batch_size, epochs=epochs, verbose=verbose)

    def predict(self, X_test):
        return self.model.predict(X_test)




### Load and Prepare Data

In [5]:
corpus = pd.read_csv('/kaggle/input/d/sarasnasr/snappfood/train.csv', on_bad_lines='skip', delimiter='\t')

preprocessor = Preprocessor()

tqdm.pandas()
corpus['Cleaned'] = corpus['comment'].progress_apply(preprocessor.preprocess)
corpus = corpus.drop('Unnamed: 0', axis=1).dropna()

count_vectorizer = CountVectorizer()
X_count_vectorized = count_vectorizer.fit_transform(corpus.Cleaned).todense()

vectorizer = TfidfVectorizer(min_df=2, max_features=10000)
X_tfidf_vectorized = vectorizer.fit_transform(corpus.Cleaned).todense()

labels = corpus['label_id'].values

100%|██████████| 56700/56700 [01:23<00:00, 682.72it/s]


### Split Data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_count_vectorized, labels, test_size=0.2, random_state=42)
X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test = train_test_split(X_tfidf_vectorized, labels,
                                                                            test_size=0.2, random_state=42)

### Train Model

In [7]:
sentiment_model = SentimentModel()

sentiment_model.build_model(X_tfidf_train.shape)
y_tfidf_train_cat = to_categorical(y_tfidf_train)
sentiment_model.train_model(X_tfidf_train, y_tfidf_train_cat)

y_test_pred = sentiment_model.predict(X_tfidf_test)
y_test_predclass = np.argmax(y_test_pred, axis=1)
y_train_pred = sentiment_model.predict(X_tfidf_train)
y_train_predclass = np.argmax(y_train_pred, axis=1)

Epoch 1/10
1418/1418 - 223s - loss: 0.3808 - 223s/epoch - 157ms/step
Epoch 2/10
1418/1418 - 214s - loss: 0.3001 - 214s/epoch - 151ms/step
Epoch 3/10
1418/1418 - 215s - loss: 0.2386 - 215s/epoch - 151ms/step
Epoch 4/10
1418/1418 - 233s - loss: 0.1573 - 233s/epoch - 165ms/step
Epoch 5/10
1418/1418 - 223s - loss: 0.0971 - 223s/epoch - 157ms/step
Epoch 6/10
1418/1418 - 223s - loss: 0.0660 - 223s/epoch - 157ms/step
Epoch 7/10
1418/1418 - 222s - loss: 0.0492 - 222s/epoch - 157ms/step
Epoch 8/10
1418/1418 - 222s - loss: 0.0419 - 222s/epoch - 157ms/step
Epoch 9/10
1418/1418 - 222s - loss: 0.0349 - 222s/epoch - 156ms/step
Epoch 10/10
1418/1418 - 223s - loss: 0.0325 - 223s/epoch - 157ms/step


In [20]:
# Random-Forest

RandomForestClassifier(n_estimators=100, random_state=42)
X_tfidf_train_array = np.asarray(X_tfidf_train)
rf_model.fit(X_tfidf_train_array, y_tfidf_train)

X_tfidf_test_array = np.asarray(X_tfidf_test)
y_rf_pred = rf_model.predict(X_tfidf_test_array)
accuracy_rf = accuracy_score(y_tfidf_test, y_rf_pred)

In [30]:
# XG-Boost

xgb_model = XGBClassifier()
xgb_model.fit(X_tfidf_train_array, y_tfidf_train)

y_xgb_pred = xgb_model.predict(X_tfidf_test_array)
accuracy_xgb = accuracy_score(y_tfidf_test, y_xgb_pred)


### Evaluate Model

In [33]:
class Evaluator:
    @staticmethod
    def evaluate_accuracy(y_true, y_predclass):
        return round(accuracy_score(y_true, y_predclass), 4) * 100
    
    @staticmethod
    def evaluate_f1(y_true, y_predclass):
        return round(f1_score(y_true, y_predclass, average='weighted'), 4) * 100


In [34]:
evaluator = Evaluator()
test_accuracy = evaluator.evaluate_accuracy(y_tfidf_test, y_test_predclass)
train_accuracy = evaluator.evaluate_accuracy(y_tfidf_train, y_train_predclass)

test_f1 = evaluator.evaluate_f1(y_tfidf_test, y_test_predclass)
train_f1 = evaluator.evaluate_f1(y_tfidf_train, y_train_predclass)

print(f"Deep Neural Network - Test accuracy: {test_accuracy}%")
print(f"Deep Neural Network - Train accuracy: {train_accuracy}%")
print(f"Deep Neural Network - Test F1 score: {test_f1}%")
print(f"Deep Neural Network - Train F1 score: {train_f1}%")


Deep Neural Network - Test accuracy: 82.86%
Deep Neural Network - Train accuracy: 99.49%
Deep Neural Network - Test F1 score: 82.86%
Deep Neural Network - Train F1 score: 99.49%


In [25]:
print("Random Forest Classifier Evaluation:")
print(f"Accuracy: {accuracy_rf}")
print("Classification Report:")
print(classification_report(y_tfidf_test, y_rf_pred))

Random Forest Classifier Evaluation:
Accuracy: 0.8471781305114638
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.79      0.84      5613
           1       0.82      0.90      0.86      5727

    accuracy                           0.85     11340
   macro avg       0.85      0.85      0.85     11340
weighted avg       0.85      0.85      0.85     11340



In [35]:
print("\nXGBoost Classifier Evaluation:")
print(f"Accuracy: {accuracy_xgb}")
print("Classification Report:")
print(classification_report(y_tfidf_test, y_xgb_pred))


XGBoost Classifier Evaluation:
Accuracy: 0.8486772486772487
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.79      0.84      5613
           1       0.82      0.91      0.86      5727

    accuracy                           0.85     11340
   macro avg       0.85      0.85      0.85     11340
weighted avg       0.85      0.85      0.85     11340



# It's your Turn!

In [36]:
model_dropdown = widgets.Dropdown(
    options=['Deep Neural Network', 'Random Forest', 'XGBoost'],
    value='Deep Neural Network',
    description='Select Model:'
)

input_text = widgets.Textarea(
    placeholder='Enter your text here...'
)


output_area = widgets.Output()
prediction = ""

def on_predict_button_click(b):
    with output_area:
        output_area.clear_output()
        selected_model = model_dropdown.value
        input_text_value = input_text.value

        if selected_model == 'Deep Neural Network':
            X_pred = vectorizer.transform([preprocessor.preprocess(input_text_value)]).todense()
            predictions = sentiment_model.predict(X_pred)
            predicted_class = np.argmax(predictions, axis=1)[0]
            sentiment_label = "SAD \U0001F975" if predicted_class == 1 else "Happy \U0001F929"
            print(f"Predicted Sentiment: {sentiment_label}")

        elif selected_model == 'Random Forest':
            X_pred = vectorizer.transform([preprocessor.preprocess(input_text_value)]).todense()
            X_pred_array = np.asarray(X_pred)  # Convert to numpy array
            prediction = rf_model.predict(X_pred_array)[0]
            sentiment_label = "SAD \U0001F975" if prediction == 1 else "Happy \U0001F929"
            print(f"Predicted Sentiment: {sentiment_label}")

        elif selected_model == 'XGBoost':
            X_pred = vectorizer.transform([preprocessor.preprocess(input_text_value)]).todense()
            X_pred_array = np.asarray(X_pred)  # Convert to numpy array
            prediction = xgb_model.predict(X_pred_array)[0]
            sentiment_label = "SAD \U0001F975" if prediction == 1 else "Happy \U0001F929"
            print(f"Predicted Sentiment: {sentiment_label}")


            
predict_button = widgets.Button(description='Predict Sentiment')
predict_button.on_click(on_predict_button_click)

# Display widgets
print(prediction)
display(model_dropdown)
display(input_text)
display(predict_button)
display(output_area)





Dropdown(description='Select Model:', options=('Deep Neural Network', 'Random Forest', 'XGBoost'), value='Deep…

Textarea(value='', placeholder='Enter your text here...')

Button(description='Predict Sentiment', style=ButtonStyle())

Output()