# Sentiment Analysis Prediction

## Dependecies

In [None]:
!pip install neattext

In [None]:
import numpy as np
import pandas as pd
import spacy
import neattext.functions as nfx
import matplotlib.pyplot as plt
import seaborn as sns

## Cargar Dataset

In [None]:
from google.colab import files
files.upload()

In [None]:
df = pd.read_csv('Sentiment Analysis Dataset (1).csv', encoding='latin-1')

In [None]:
df.head()

In [None]:
df.shape

## Data Cleaning

In [None]:
df['text_clean'] = df['SentimentText'].apply(nfx.remove_punctuations)

In [None]:
df['text_clean'] = df['text_clean'].apply(nfx.remove_shortwords)

In [None]:
df['text_clean'] = df['text_clean'].apply(nfx.remove_special_characters)

In [None]:
df['text_clean'] = df['text_clean'].apply(nfx.remove_puncts)

In [None]:
df['text_clean'] = df['text_clean'].apply(nfx.remove_dates)

In [None]:
df[['SentimentText', 'text_clean']].head()

## Crear polaridad y Subjetividad  
Métricas de polaridad y subjetividad
Para comenzar nuestro viaje, veamos la oferta de TextBlob. Con TextBlob, obtenemos una métrica de polaridad y subjetividad. La polaridad es el sentimiento mismo, que va de -1 a +1. La subjetividad es una medida del sentimiento siendo objetivo a subjetivo, y va de 0 a 1

In [None]:
from textblob import TextBlob

In [None]:
df['polarity'] = df['text_clean'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [None]:
df['subjectivity'] = df['text_clean'].apply(lambda x: TextBlob(x).sentiment.subjectivity)

In [None]:
df.head()

In [None]:
#Get sentiment labels
def get_sentiment(polarity):
  if polarity >0:
    label = 'Positive'
  elif polarity <0:
    label ='Negative'
  else:
    label = 'Neutral'
  return label

In [None]:
df['sentiment_label'] = df['polarity'].apply(lambda x: get_sentiment(x))

In [None]:
df.head()

In [None]:
df['sentiment_label'].value_counts()

In [None]:
sns.countplot(df['sentiment_label'])
plt.show()

In [None]:
print('Porcentaje:')
round(df['sentiment_label'].value_counts(normalize = True)*100, 2)

### Reducir los datos a 1000 registros

In [None]:
df1 = df[:1000]
df1.head()

In [None]:
df1.shape

## Sentiment Scores

In [None]:
!pip install vaderSentiment

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [None]:
def getSIA(text):
  sia = SentimentIntensityAnalyzer()
  sentiment= sia.polarity_scores(text)
  return sentiment

In [None]:
compound = []
neg = []
neu = []
pos = []

for i in range(0, len(df1['text_clean'])):
    SIA = getSIA(df1['text_clean'][i])
    compound.append(SIA['compound'])
    neg.append(SIA['neg'])
    neu.append(SIA['neu'])
    pos.append(SIA['pos'])

In [None]:
df1['compound'] = compound
df1['negative'] = neg
df1['positive'] = pos
df1['neutral'] = neu

In [None]:
df1.head()

In [None]:
df1.to_csv('sent_clasif.csv', index=False)

In [None]:
df1.columns

### Feature selection

In [None]:
keep_columns = ['Sentiment',  'polarity', 'subjectivity', 'compound', 'negative', 'positive', 'neutral']
data = df1[keep_columns]
data.head()

In [None]:
X = data.drop('Sentiment', axis =1)
y = data['Sentiment']
X.shape, y.shape

Split into Train and Test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state= 42)

Build Model

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
model = LinearDiscriminantAnalysis()
model.fit(X_train, y_train)

In [None]:
yhat = model.predict(X_test)

In [None]:
result = pd.DataFrame({'actual': y_test, 'predicted': yhat})
result.head()

In [None]:
result['diferencia'] = result['actual'] - result['predicted']

In [None]:
result['diferencia'].value_counts()

In [None]:
result['diferencia'].value_counts().plot(kind = 'barh')
plt.show()

In [None]:
result[result['diferencia']>0].head()

In [None]:
result[result['diferencia']<0]

### score

In [None]:
from sklearn.metrics import  classification_report

In [None]:
print(classification_report(y_test, yhat))