In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install neattext

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import neattext.functions as nfx

In [None]:
# Load ML Pkgs
# Estimators
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

# Transformers
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [None]:
df = pd.read_csv('.\DataSets\emotion_dataset_raw.csv')
#df = pd.read_csv('.\DataSets\combinedfile.csv')
#df = pd.read_csv('.\DataSets\tweet_emotions.csv')


In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df['Emotion'].value_counts()

In [None]:
#df['Emotion'].value_counts().plot(kind='bar')
#sns.countplot(df['Emotion'])
sns.countplot(x='Emotion', data=df)

In [None]:
from textblob import TextBlob
from sqlalchemy.engine import result

In [None]:
def get_sentiment(text):
  blob = TextBlob(text)
  sentiment = blob.sentiment.polarity
  if sentiment > 0:
    return 'Positive'
  elif sentiment == 0:
    return 'Neutral'
  else:
    result = 'Negative'
    return result

In [None]:
get_sentiment('I am happy')

In [None]:
df['Sentiment'] = df['Text'].apply(get_sentiment)

In [None]:
df.head()

In [None]:
df.groupby(['Emotion', 'Sentiment']).size()

In [None]:
df.groupby(['Emotion', 'Sentiment']).size().plot(kind='bar')

In [None]:
# # Data Cleaning
dir(nfx)

In [None]:
# User handles
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)
# Stopwords
df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)
#punctuations
df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_punctuations)

In [None]:
df

In [None]:
#common word extraction
from collections import Counter

In [None]:
def extract_keywords(text, num=50):
  tokens = [tok for tok in text.split()]
  most_common_tokens = Counter(tokens).most_common(num)
  return dict(most_common_tokens)

In [None]:
emotion_list = df['Emotion'].unique().tolist()
emotion_list

In [None]:
joy_list = df[df['Emotion']== 'joy']['Clean_Text'].tolist()
#joy_list

In [None]:
joy_docx = ' '.join(joy_list)
joy_docx

In [None]:
#extracting keywords
keyword_joy = extract_keywords(joy_docx)
keyword_joy

In [None]:
def plot_most_common_words(mydict):
  df_01 = pd.DataFrame(mydict.items(), columns=['token', 'Count'])
  plt.figure(figsize=(20,10))
  sns.barplot(x='token', y='Count', data=df_01)
  plt.xticks(rotation=45)
  plt.show()

In [None]:
plot_most_common_words(keyword_joy)

In [None]:
surprise_list = df[df['Emotion']== 'surprise']['Clean_Text'].tolist()
surprise_docx = ' '.join(surprise_list)
keyword_surprise = extract_keywords(surprise_docx)

In [None]:
from wordcloud import WordCloud

In [None]:
# def plot_wordcloud(docx):
#   wordcloud = WordCloud().generate(docx)
#   plt.figure(figsize=(20,10))
#   plt.imshow(wordcloud, interpolation="bilinear")
#   plt.axis('off')
#   plt.show()

# plot_wordcloud(joy_docx)
# plot_wordcloud(surprise_docx)

Main model

In [None]:
#main ML model
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [None]:
#feature building
Xfeature = df['Clean_Text']
ylabel = df['Emotion']
Xfeature

In [None]:
cv = CountVectorizer()
X= cv.fit_transform(Xfeature)

In [None]:
#geatures by name
cv.get_feature_names_out()

In [None]:
X.toarray()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, ylabel, test_size=0.3, random_state=42)

In [None]:
#model building
nv_model = MultinomialNB()
nv_model.fit(X_train, y_train)

In [None]:
#accuracy
accuracynv = nv_model.score(X_test, y_test)
print("Naive bayes Model Accuracy:", accuracynv)

In [None]:
#prediction
y_pred_for_nv = nv_model.predict(X_test)
y_pred_for_nv

array(['fear', 'sadness', 'sadness', ..., 'sadness', 'anger', 'sadness'],
      dtype='<U8')

In [None]:
#making individual prediction
sample_text = ['I am happy']
vect = cv.transform(sample_text).toarray()
nv_model.predict(vect)

In [None]:
#prediction probablity
nv_model.predict_proba(vect)

In [None]:
nv_model.classes_

In [None]:
np.max(nv_model.predict_proba(vect))

In [None]:
def predict_emotion(sample_texts, model):
    if isinstance(sample_texts, str):
        sample_texts = [sample_texts]  # Convert single string to a list of one element

    predictions = []
    pred_percentages = []

    for sample_text in sample_texts:
        myvect = cv.transform([sample_text]).toarray()
        prediction = model.predict(myvect)
        prediction_proba = model.predict_proba(myvect)
        pred_percentage_for_all = dict(zip(model.classes_, prediction_proba[0]))
        print("prediction:{}, Prediction Score:{}".format(prediction[0], np.max(prediction_proba)))
        predictions.append(prediction[0])
        pred_percentages.append(pred_percentage_for_all)

    return predictions, pred_percentages


In [None]:
predict_emotion(sample_text, nv_model)

In [None]:
predict_emotion(["i am going to kill you"], nv_model)

In [None]:
print(classification_report(y_test, y_pred_for_nv))

In [None]:
#model saving
import joblib

In [None]:
model_file = open("/content/drive/MyDrive/AAIAttempt03/nv_model.pkl", "wb")
joblib.dump(nv_model, model_file)
model_file.close()

In [None]:
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

In [None]:
accuracylr = lr_model.score(X_test, y_test)
print("LR Model Accuracy:", accuracylr)

In [None]:
predict_emotion("i am happy", nv_model)

In [None]:
# a= input("enter the text: ")
# print("prediction using Logistic Regression: ", predict_emotion(a, lr_model))
# print("prediction using Naive Bayes: ", predict_emotion(a, nv_model))

In [None]:
!pip install eli5

In [None]:
import eli5

In [None]:
eli5.show_weights(lr_model, top=20)

In [None]:
class_names = ylabel.unique().tolist()

In [None]:
feature_names = cv.get_feature_names_out()

In [None]:
eli5.show_weights(lr_model, feature_names=feature_names, target_names=class_names)

In [None]:
# ?eli5.show_weights

In [None]:
from sklearn.svm import SVC

In [None]:
# Create an instance of the SVM classifier
svm_model = SVC(kernel='linear', probability=True)

# Train the SVM model on your training data
svm_model.fit(X_train, y_train)

In [None]:
# Calculate the accuracy of the SVM model on the test data
svm_accuracy = svm_model.score(X_test, y_test)
print("SVM Model Accuracy:", svm_accuracy)

In [None]:
predict_emotion("i am sad", svm_model)

In [None]:
# Predict emotions using the SVM model
y_pred_for_svm = svm_model.predict(X_test)

In [None]:
svm_model_file = open("/content/drive/MyDrive/AAIAttempt03/svm_model.pkl", "wb")
joblib.dump(svm_model, svm_model_file)
svm_model_file.close()

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Create individual classifiers
logistic_regression = LogisticRegression(max_iter=1000)
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
naive_bayes = MultinomialNB()

In [None]:
# Create a Voting Classifier that combines the predictions of individual classifiers
voting_classifier = VotingClassifier(estimators=[
    ('lr', logistic_regression),
    ('rf', random_forest),
    ('nb', naive_bayes)
], voting='soft')  # 'soft' voting uses predicted class probabilities for voting

In [None]:
# Train the Voting Classifier on the training data
voting_classifier.fit(X_train, y_train)

# Make predictions using the Voting Classifier
y_pred = voting_classifier.predict(X_test)

In [None]:
# Calculate the accuracy of the ensemble model
accuracy = accuracy_score(y_test, y_pred)
print("Ensemble Model Accuracy:", accuracy)

In [None]:
predict_emotion("life is so depressing", voting_classifier)

In [None]:
#set of accuracies
print("Naive bayes Model Accuracy:", accuracynv)
print("LR Model Accuracy:", accuracylr)
print("SVM Model Accuracy:", svm_accuracy)
print("Ensemble Model Accuracy:", accuracy)

In [None]:
a= input("enter the text: ")
print("====================================================================")
print("prediction using Logistic Regression: ", predict_emotion(a, lr_model))
print("====================================================================")
print("prediction using Naive Bayes: ", predict_emotion(a, nv_model))
print("====================================================================")
print("prediction using SVM: ", predict_emotion(a, svm_model))
print("====================================================================")
print("prediction using Ensemble Model: ", predict_emotion(a, voting_classifier))