<a href="https://colab.research.google.com/github/mohammedali001/sentiment_analysis/blob/main/SENTIMENT_ANALYSIS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RECOMANDATION SYSTEM MODEL

# LINKING COLAB WITH DRIVE TO ACCESS DATASET

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## UPLOADING DATASET

In [3]:
import pandas as pd
df = pd.read_csv('/content/sentiment_analysis.csv')
df


Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram
...,...,...,...,...,...,...,...
494,2015,10,18,night,"According to , a quarter of families under six...",negative,Twitter
495,2021,2,25,morning,the plan to not spend money is not going well,negative,Instagram
496,2022,5,30,noon,uploading all my bamboozle pictures of facebook,neutral,Facebook
497,2018,8,10,night,congratulations ! you guys finish a month ear...,positive,Twitter


#DATA PREPARATION

## DATA CLEANING

In [7]:
import re
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    return text

df['cleaned_text'] = df['text'].apply(clean_text)
df

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform,cleaned_text
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter,what a great day looks like dream
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook,i feel sorry i miss you here in the sea beach
2,2017,8,18,night,Don't angry me,negative,Facebook,dont angry me
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook,we attend in the class just for listening teac...
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram,those who want to go let them go
...,...,...,...,...,...,...,...,...
494,2015,10,18,night,"According to , a quarter of families under six...",negative,Twitter,according to a quarter of families under six ...
495,2021,2,25,morning,the plan to not spend money is not going well,negative,Instagram,the plan to not spend money is not going well
496,2022,5,30,noon,uploading all my bamboozle pictures of facebook,neutral,Facebook,uploading all my bamboozle pictures of facebook
497,2018,8,10,night,congratulations ! you guys finish a month ear...,positive,Twitter,congratulations you guys finish a month earl...


##SPLITTING THE DATA INTO X AND Y

In [8]:
y = df['sentiment']
y

0      positive
1      positive
2      negative
3      negative
4      negative
         ...   
494    negative
495    negative
496     neutral
497    positive
498    negative
Name: sentiment, Length: 499, dtype: object

In [26]:
x= df['cleaned_text']
x

0                      what a great day looks like dream
1          i feel sorry i miss you here in the sea beach
2                                          dont angry me
3      we attend in the class just for listening teac...
4                       those who want to go let them go
                             ...                        
494    according to  a quarter of families under six ...
495        the plan to not spend money is not going well
496      uploading all my bamboozle pictures of facebook
497     congratulations  you guys finish a month earl...
498     actually i wish i was back in tahoe i miss it...
Name: cleaned_text, Length: 499, dtype: object

## SPLITTING THE TRAINING SET, TESTING SET AND THE RANDOM STATE

In [27]:
from re import X
from sklearn.model_selection import train_test_split

# we create four random variables below
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=100)

### THE TRAINING SET FOR X

In [28]:
X_train

205       grabbing coffee from then making mom breakfast
445                                 hey i didnt get any 
321       ohh my tooth is hurts ohh im sad it very hurts
368    had a great time out in the beer garden wit th...
55     i am such a creeper i feel disappointed becaus...
                             ...                        
343                       lmao smh that one threw me off
359    is spending her saturday morning taking notes ...
323     u think u have bills haii just finished payin...
280    getting ready for week its too nice today to b...
8                                            both of you
Name: cleaned_text, Length: 399, dtype: object

### TESTING SET FOR X

In [29]:
X_test

69     we attend in the class just for listening teac...
29     went to sleep and there is a power cut in the ...
470    just woke up o mums singing to her new gnr cd ...
494    according to  a quarter of families under six ...
54            i saw an amazing nightmare yesterday night
                             ...                        
282    felt like  behaved like my son ate to compensa...
152                                 today is our eee day
154                            im studying in psychology
56              i have bad headech what i need to do now
391                                          brainfreeze
Name: cleaned_text, Length: 100, dtype: object

### TRAINING SET FOR Y

In [13]:
y_train

205     neutral
445     neutral
321    negative
368     neutral
55     negative
         ...   
343     neutral
359    negative
323    negative
280    positive
8       neutral
Name: sentiment, Length: 399, dtype: object

### TESTING SET FOR Y

In [14]:
y_test

69     negative
29      neutral
470    positive
494    negative
54      neutral
         ...   
282    negative
152     neutral
154    negative
56     negative
391     neutral
Name: sentiment, Length: 100, dtype: object

# BUILDING MODEL

## RANDOM FOREST ALGORITHM

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)
rf = RandomForestClassifier()
rf.fit(X_train_tfidf, y_train)

## PREDICTING ON THE TEST DATA

In [17]:
y_pred = rf.predict(X_test_tfidf)
y_pred

array(['neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive',
       'neutral', 'neutral', 'positive', 'neutral', 'neutral', 'positive',
       'positive', 'negative', 'neutral', 'neutral', 'neutral', 'neutral',
       'positive', 'neutral', 'neutral', 'neutral', 'positive', 'neutral',
       'negative', 'neutral', 'positive', 'neutral', 'neutral',
       'positive', 'neutral', 'negative', 'neutral', 'neutral', 'neutral',
       'neutral', 'positive', 'neutral', 'positive', 'positive',
       'positive', 'positive', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'negative',
       'neutral', 'neutral', 'neutral', 'neutral', 'positive', 'positive',
       'neutral', 'neutral', 'neutral', 'negative', 'positive',
       'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral',
       'neutral', 'neutral', 'negative', 'negative', 'neutra

## ACCURACY OF THE MODEL

In [30]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f"{accuracy * 100}")

56.99999999999999


In [23]:
report

'              precision    recall  f1-score   support\n\n    negative       0.58      0.27      0.37        26\n     neutral       0.60      0.78      0.68        51\n    positive       0.48      0.43      0.45        23\n\n    accuracy                           0.57       100\n   macro avg       0.55      0.50      0.50       100\nweighted avg       0.57      0.57      0.55       100\n'

## CHECKINT SENTIMENT OF A COMMENT

In [49]:
comment = "this is not good "
comment_tfidf = vectorizer.transform([comment])
sentiment = rf.predict(comment_tfidf)
print(sentiment)

['negative']


# IMPORTING TEXTBLOB LIBRARY

In [19]:
from textblob import TextBlob

##  CHECKING POLARITY OF COMMENT

In [50]:
def get_comment_polarity(comment):
    analysis = TextBlob(comment)
    polarity = analysis.sentiment.polarity
    return polarity # Add a return statement to get the polarity value

# Call the function with proper indentation
result = get_comment_polarity(comment)
print(result)

-0.35


In [59]:
import joblib

# Save the model
joblib.dump(model, 'random_forest_model.pkl')

# Save the vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']