In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import warnings 
warnings.filterwarnings('ignore')
import re
from nltk.corpus import stopwords

In [9]:
data = pd.read_csv('sentiment_analysis.csv')
data

Unnamed: 0,Year,Month,Day,Time of Tweet,text,sentiment,Platform
0,2018,8,18,morning,What a great day!!! Looks like dream.,positive,Twitter
1,2018,8,18,noon,"I feel sorry, I miss you here in the sea beach",positive,Facebook
2,2017,8,18,night,Don't angry me,negative,Facebook
3,2022,6,8,morning,We attend in the class just for listening teac...,negative,Facebook
4,2022,6,8,noon,"Those who want to go, let them go",negative,Instagram
...,...,...,...,...,...,...,...
494,2015,10,18,night,"According to , a quarter of families under six...",negative,Twitter
495,2021,2,25,morning,the plan to not spend money is not going well,negative,Instagram
496,2022,5,30,noon,uploading all my bamboozle pictures of facebook,neutral,Facebook
497,2018,8,10,night,congratulations ! you guys finish a month ear...,positive,Twitter


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 499 entries, 0 to 498
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Year           499 non-null    int64 
 1   Month          499 non-null    int64 
 2   Day            499 non-null    int64 
 3   Time of Tweet  499 non-null    object
 4   text           499 non-null    object
 5   sentiment      499 non-null    object
 6   Platform       499 non-null    object
dtypes: int64(3), object(4)
memory usage: 27.4+ KB


In [13]:
data.isnull().sum()

Year             0
Month            0
Day              0
Time of Tweet    0
text             0
sentiment        0
Platform         0
dtype: int64

In [15]:
data.describe()

Unnamed: 0,Year,Month,Day
count,499.0,499.0,499.0
mean,2020.39479,6.084168,15.693387
std,2.830991,3.652711,8.804673
min,2010.0,1.0,1.0
25%,2019.0,2.0,8.0
50%,2021.0,6.0,15.0
75%,2023.0,9.0,22.0
max,2023.0,12.0,31.0


In [17]:
import re
import string

def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text


In [19]:
import nltk

In [22]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))


def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = ' '.join([word for word in text.split() if word not in stop_words])  # Remove stopwords
    return text

data["cleaned_review"] = data["text"].apply(clean_text)
print(data.head())


   Year  Month  Day Time of Tweet  \
0  2018      8   18       morning   
1  2018      8   18          noon   
2  2017      8   18         night   
3  2022      6    8       morning   
4  2022      6    8          noon   

                                                text sentiment     Platform  \
0              What a great day!!! Looks like dream.  positive    Twitter     
1     I feel sorry, I miss you here in the sea beach  positive    Facebook    
2                                     Don't angry me  negative     Facebook   
3  We attend in the class just for listening teac...  negative    Facebook    
4                  Those who want to go, let them go  negative   Instagram    

                                      cleaned_review  
0                         great day looks like dream  
1                          feel sorry miss sea beach  
2                                         dont angry  
3  attend class listening teachers reading slide ...  
4                          

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [25]:
vectorizer = TfidfVectorizer(max_features=5000)  # Limit to top 5000 words
X = vectorizer.fit_transform(data["cleaned_review"])
y = data["sentiment"]


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [29]:
model = LogisticRegression()
model.fit(X_train, y_train)


In [31]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.59

Classification Report:
               precision    recall  f1-score   support

    negative       0.92      0.33      0.49        36
     neutral       0.43      0.93      0.59        30
    positive       0.86      0.56      0.68        34

    accuracy                           0.59       100
   macro avg       0.74      0.61      0.59       100
weighted avg       0.76      0.59      0.58       100



In [33]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train, y_train)


In [35]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.67

Classification Report:
               precision    recall  f1-score   support

    negative       0.77      0.56      0.65        36
     neutral       0.54      0.87      0.67        30
    positive       0.81      0.62      0.70        34

    accuracy                           0.67       100
   macro avg       0.71      0.68      0.67       100
weighted avg       0.71      0.67      0.67       100



In [36]:
import joblib

joblib.dump(model, "sentiment_model.pkl")
joblib.dump(vectorizer, "vectorizer.pkl")


['vectorizer.pkl']

In [38]:
import nest_asyncio
import uvicorn


nest_asyncio.apply()

In [None]:
import joblib
import uvicorn
from fastapi import FastAPI

# Load the trained model and vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("vectorizer.pkl")

app = FastAPI()

@app.post("/predict")
def predict(review: str):
    transformed_review = vectorizer.transform([review])
    prediction = model.predict(transformed_review)[0]
    return {"sentiment": prediction}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
