# Sentiment Analysis
### This notebook includes an implementation of a sentiment analysis. The code mainly uses pandas for preprocessing, sci-kit for the machine learning process.

### Libraries

In [1]:
import pandas as pd
import numpy as np
import re 
import csv

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

### Preprocessing.

In [2]:
train = pd.read_csv('train.csv')

In [3]:
train

Unnamed: 0,text,emotion
0,Yeah for coupons! Found this place randomly ...,neutral
1,i still love to feel a gentle breeze and hear ...,happiness
2,"Normal, fama devido ao programa de tv. A sobre...",neutral
3,im feeling a little less jaded,sadness
4,i have never shaken the feeling of ferocious p...,hate
...,...,...
7995,i still feel a little weird telling people abo...,worry
7996,i am feeling rather overwhelmed with all that ...,surprise
7997,i feel shocked his words very pure very self,surprise
7998,"Service was sub par, really ... It was so disa...",neutral


In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8000 entries, 0 to 7999
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   text     8000 non-null   object
 1   emotion  8000 non-null   object
dtypes: object(2)
memory usage: 125.1+ KB


In [5]:
train.isnull().sum()

text       0
emotion    0
dtype: int64

In [6]:
#Cleaning data from special characters
def Data_Cleaning(train):
        train['text'] = train['text'].apply(lambda x: x.replace('!',''))
        train['text'] = train['text'].apply(lambda x: x.replace('$',''))
        train['text'] = train['text'].apply(lambda x: x.replace('.*',''))
        train['text'] = train['text'].apply(lambda x: x.replace('/',''))
        train['text'] = train['text'].apply(lambda x: x.replace('?',''))
        train['text'] = train['text'].apply(lambda x: x.replace(';',''))
        train['text'] = train['text'].apply(lambda x: x.replace('(',''))
        train['text'] = train['text'].apply(lambda x: x.replace(')',''))
        train['text'] = train['text'].apply(lambda x: x.replace(':',''))
        train['text'] = train['text'].apply(lambda x: x.replace(',',''))
        

In [7]:
Data_Cleaning(train)

In [8]:
#vectorization
vectorization = TfidfVectorizer()
X = train['text']
Y = train['emotion']

In [9]:
X = vectorization.fit_transform(X)

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, random_state = 0)

### Training

In [11]:
classif = LinearSVC()
classif.fit(X_train, Y_train)

LinearSVC()

### Testing

In [12]:
Y_prediction = classif.predict(X_test)

### Txt And CSV File Creation

In [13]:
Y_predictiontxt = open('prediction.txt','w')

In [14]:
for text in Y_prediction:
    Y_predictiontxt.write(str(text)+'\n')

In [15]:
Y_predictiontxt.close()

In [16]:
Y_testcsv = open('Y_test.csv','w')

In [17]:
writer = csv.writer(Y_testcsv)

In [18]:
writer.writerow(Y_test)

11415

In [19]:
Y_testcsv.close()

### Results

In [20]:
print(classification_report(Y_test, Y_prediction))

              precision    recall  f1-score   support

         fun       0.89      0.80      0.84       235
   happiness       0.89      0.94      0.91       299
        hate       0.87      0.85      0.86       222
     neutral       0.96      1.00      0.98       254
     sadness       0.88      0.83      0.86       236
    surprise       0.79      0.92      0.85       110
       worry       0.84      0.83      0.84       244

    accuracy                           0.88      1600
   macro avg       0.87      0.88      0.88      1600
weighted avg       0.88      0.88      0.88      1600



In [21]:
x = 'I am afraid I will be late to class'
vector1 = vectorization.transform([x])

In [22]:
classif.predict(vector1)

array(['worry'], dtype=object)