In [1]:
import pandas as pd
import seaborn as sns
import numpy as np

In [2]:
import re
import string
from string import punctuation
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('stopwords')


import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.callbacks import EarlyStopping

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\darre\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
column_names = ["","id","date","flag","user","text"]
data = pd.read_csv("ProjectTweets.csv", names=column_names)

In [4]:
data.head(10)

Unnamed: 0,Unnamed: 1,id,date,flag,user,text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t..."
1,1,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...
2,2,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...
3,3,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire
4,4,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all...."
5,5,1467811372,Mon Apr 06 22:20:00 PDT 2009,NO_QUERY,joy_wolf,@Kwesidei not the whole crew
6,6,1467811592,Mon Apr 06 22:20:03 PDT 2009,NO_QUERY,mybirch,Need a hug
7,7,1467811594,Mon Apr 06 22:20:03 PDT 2009,NO_QUERY,coZZ,@LOLTrish hey long time no see! Yes.. Rains a...
8,8,1467811795,Mon Apr 06 22:20:05 PDT 2009,NO_QUERY,2Hood4Hollywood,@Tatiana_K nope they didn't have it
9,9,1467812025,Mon Apr 06 22:20:09 PDT 2009,NO_QUERY,mimismo,@twittera que me muera ?


In [5]:
data.tail(10)


Unnamed: 0,Unnamed: 1,id,date,flag,user,text
1599990,1599990,2193579249,Tue Jun 16 08:38:59 PDT 2009,NO_QUERY,razzberry5594,WOOOOO! Xbox is back
1599991,1599991,2193579284,Tue Jun 16 08:38:59 PDT 2009,NO_QUERY,AgustinaP,@rmedina @LaTati Mmmm That sounds absolutely ...
1599992,1599992,2193579434,Tue Jun 16 08:39:00 PDT 2009,NO_QUERY,sdancingsteph,ReCoVeRiNg FrOm ThE lOnG wEeKeNd
1599993,1599993,2193579477,Tue Jun 16 08:39:00 PDT 2009,NO_QUERY,ChloeAmisha,@SCOOBY_GRITBOYS
1599994,1599994,2193579489,Tue Jun 16 08:39:00 PDT 2009,NO_QUERY,EvolveTom,"@Cliff_Forster Yeah, that does work better tha..."
1599995,1599995,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...
1599996,1599996,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...
1599997,1599997,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...
1599998,1599998,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...
1599999,1599999,2193602129,Tue Jun 16 08:40:50 PDT 2009,NO_QUERY,RyanTrevMorris,happy #charitytuesday @theNSPCC @SparksCharity...


In [6]:
data.shape

(1600000, 6)

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1600000 entries, 0 to 1599999
Data columns (total 6 columns):
 #   Column  Non-Null Count    Dtype 
---  ------  --------------    ----- 
 0           1600000 non-null  int64 
 1   id      1600000 non-null  int64 
 2   date    1600000 non-null  object
 3   flag    1600000 non-null  object
 4   user    1600000 non-null  object
 5   text    1600000 non-null  object
dtypes: int64(2), object(4)
memory usage: 73.2+ MB


In [8]:
def get_text_processing(text):
    stpword = stopwords.words('english')
    no_punctuation = [char for char in text if char not in string.punctuation]
    no_punctuation = ''.join(no_punctuation)
    return ' '.join([word for word in no_punctuation.split() if word.lower() not in stpword])

In [9]:
def get_sentiment(text):
    sia=SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(text)
    
    compound_score = sentiment_scores["compound"]
    if compound_score >= 0.05:
        return "Positive"
    elif compound_score <= -0.05:
        return "Negative"
    else:
        return "Neutral"
    

In [10]:
data['processed_text'] = data['text'].apply(get_text_processing)
data.head()

Unnamed: 0,Unnamed: 1,id,date,flag,user,text,processed_text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",switchfoot httptwitpiccom2y1zl Awww thats bumm...
1,1,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...,upset cant update Facebook texting might cry r...
2,2,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...,Kenichan dived many times ball Managed save 50...
3,3,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire,whole body feels itchy like fire
4,4,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all....",nationwideclass behaving im mad cant see


In [17]:
data['sentiment'] = data['processed_text'].apply(get_sentiment)
data.head()

Unnamed: 0,Unnamed: 1,id,date,flag,user,text,processed_text,sentiment
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",switchfoot httptwitpiccom2y1zl Awww thats bumm...,Negative
1,1,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...,upset cant update Facebook texting might cry r...,Negative
2,2,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...,Kenichan dived many times ball Managed save 50...,Positive
3,3,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire,whole body feels itchy like fire,Negative
4,4,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all....",nationwideclass behaving im mad cant see,Negative


In [19]:
#data10 = data.head(10)
data.tail()


Unnamed: 0,Unnamed: 1,id,date,flag,user,text,processed_text,sentiment
1599995,1599995,2193601966,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,AmandaMarie1028,Just woke up. Having no school is the best fee...,woke school best feeling ever,Positive
1599996,1599996,2193601969,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,TheWDBoards,TheWDB.com - Very cool to hear old Walt interv...,TheWDBcom cool hear old Walt interviews â« ht...,Positive
1599997,1599997,2193601991,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,bpbabe,Are you ready for your MoJo Makeover? Ask me f...,ready MoJo Makeover Ask details,Positive
1599998,1599998,2193602064,Tue Jun 16 08:40:49 PDT 2009,NO_QUERY,tinydiamondz,Happy 38th Birthday to my boo of alll time!!! ...,Happy 38th Birthday boo alll time Tupac Amaru ...,Positive
1599999,1599999,2193602129,Tue Jun 16 08:40:50 PDT 2009,NO_QUERY,RyanTrevMorris,happy #charitytuesday @theNSPCC @SparksCharity...,happy charitytuesday theNSPCC SparksCharity Sp...,Positive


In [18]:
#data10['sentiment'] = data10['processed_text'].apply(get_sentiment)
#data10.head(10)

In [20]:
#data.to_csv('Tweets_sentiment.csv', index=False)

In [40]:
data_sentiment = data[['date', 'processed_text', 'sentiment']]
data_sentiment.head()

Unnamed: 0,date,processed_text,sentiment
0,Mon Apr 06 22:19:45 PDT 2009,switchfoot httptwitpiccom2y1zl Awww thats bumm...,Negative
1,Mon Apr 06 22:19:49 PDT 2009,upset cant update Facebook texting might cry r...,Negative
2,Mon Apr 06 22:19:53 PDT 2009,Kenichan dived many times ball Managed save 50...,Positive
3,Mon Apr 06 22:19:57 PDT 2009,whole body feels itchy like fire,Negative
4,Mon Apr 06 22:19:57 PDT 2009,nationwideclass behaving im mad cant see,Negative


In [41]:
#data_sentiment.to_csv('Tweets_sentiment_.csv', index=False)

In [42]:
from datetime import datetime

def convert_to_YYYY_MM_DD(date_str):
    original = datetime.strptime(date_str, '%a %b %d %H:%M:%S PDT %Y')
    formated = original.strftime('%Y-%m-%d')
    return formated

data_sentiments = data_sentiment.copy()
data_sentiments['date'] = data_sentiment['date'].apply(convert_to_YYYY_MM_DD)

#data_sentiment['date'] = pd.to_datetime(data_sentiment['date']).dt.date

In [43]:
data_sentiments.head()

Unnamed: 0,date,processed_text,sentiment
0,2009-04-06,switchfoot httptwitpiccom2y1zl Awww thats bumm...,Negative
1,2009-04-06,upset cant update Facebook texting might cry r...,Negative
2,2009-04-06,Kenichan dived many times ball Managed save 50...,Positive
3,2009-04-06,whole body feels itchy like fire,Negative
4,2009-04-06,nationwideclass behaving im mad cant see,Negative


In [44]:
one_hot = pd.get_dummies(data_sentiments['sentiment'])
data_sentiments.drop(['sentiment'],axis=1,inplace=True)
data_sentiments = pd.concat([data_sentiments,one_hot],axis=1)
data_sentiments.head()

Unnamed: 0,date,processed_text,Negative,Neutral,Positive
0,2009-04-06,switchfoot httptwitpiccom2y1zl Awww thats bumm...,1,0,0
1,2009-04-06,upset cant update Facebook texting might cry r...,1,0,0
2,2009-04-06,Kenichan dived many times ball Managed save 50...,0,0,1
3,2009-04-06,whole body feels itchy like fire,1,0,0
4,2009-04-06,nationwideclass behaving im mad cant see,1,0,0
