### Import All mandatory Libraries

In [93]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pickle
from sklearn.linear_model import LogisticRegressionCV

In [94]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Data Cleaning and Preprocessing

In [95]:
df1 = 'data/sentiment_analysis.csv'
em1 = pd.read_csv(df1)
em1.shape

(499, 7)

In [96]:
df2 = 'data/test.csv'
em2 = pd.read_csv(df2, encoding='unicode_escape')
em2 = em2[(em2['sentiment']=='positive') | (em2['sentiment']=='neutral') | (em2['sentiment']=='negative')]
em2.shape

(3534, 9)

#### Concatination of 2 Dataframes

In [100]:
em1 = em1[['text','sentiment']]
em2 = em2[['text','sentiment']]
final_data = pd.concat([em1,em2],axis=0)
final_data

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative
...,...,...
3529,"its at 3 am, im very tired but i can`t sleep ...",negative
3530,All alone in this old house again. Thanks for...,positive
3531,I know what you mean. My little dog is sinkin...,negative
3532,_sutra what is your next youtube video gonna b...,positive


In [101]:
# # ----------- This Shell is Experimetal can not harm the model if mistakenly run only this is for elaboration --------------
# Mapping of positive negative and neutral
sentiment_to_label = {'positive': 2, 'neutral': 1, 'negative': 0}
# Apply the mapping to create a new 'label' column
final_data['label'] = final_data['sentiment'].map(sentiment_to_label)
final_data.head()

Unnamed: 0,text,sentiment,label
0,What a great day!!! Looks like dream.,positive,2
1,"I feel sorry, I miss you here in the sea beach",positive,2
2,Don't angry me,negative,0
3,We attend in the class just for listening teac...,negative,0
4,"Those who want to go, let them go",negative,0


In [102]:
# We Only Have to work With text and its sentiment we can also go with it's label as well only some will change in the code

data = final_data[['text','sentiment']]
# for Experiment
# data = emotions[['text','label']]
data.head()

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative


### Feature Extraction

In [120]:
tfidf = TfidfVectorizer(
    use_idf=True,
    norm='l2',
    smooth_idf=True
)
# y= data.label.values
y = data.sentiment.values
X = tfidf.fit_transform(data['text'].values.astype('U'))
tfidf_vectorizer = open('tfidf_vectorizer.sav','wb')
pickle.dump(tfidf,tfidf_vectorizer)
tfidf_vectorizer.close()

### Prepare data for Traning and Testing

In [121]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1,test_size=0.2,shuffle=False)

### Model Traning and Save Model

In [122]:
clf = LogisticRegressionCV(
    cv=5,
    scoring='accuracy',
    random_state=0,
    n_jobs=-1,
    verbose=3,
    max_iter=300
).fit(X_train,y_train)
saved_model = open('saved_model.sav','wb')
pickle.dump(clf,saved_model)
saved_model.close()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   42.4s remaining:  1.1min
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   53.0s finished


### Check Model Accuracy

In [123]:
filename = 'saved_model.sav'
with open(filename, 'rb') as model_file:
    saved_model = pickle.load(model_file)
accuracy = saved_model.score(X_test,y_test)
print(f"Model Accuracy is : {accuracy*100:.2f}%")

Model Accuracy is : 62.33%


### Prediction

##### You Can Directly run Prediction shell 

In [15]:
import pickle

filename = 'saved_model.sav'
with open(filename, 'rb') as model_file:
    saved_model = pickle.load(model_file)
    
vectorizer_filename = 'tfidf_vectorizer.sav'
with open(vectorizer_filename, 'rb') as vectorizer_file:
    tfidf_vectorizer = pickle.load(vectorizer_file)

text = input("Enter you Text to Predict the Sentiment :")
text_tfidf = tfidf_vectorizer.transform([text])
prediction = saved_model.predict(text_tfidf)
print(f"Your Text Sentiment is : {prediction[0]}")

Enter you Text to Predict the Sentiment : This service is not really expected it can be better


Your Text Sentiment is : neutral
