# Perform sentimental analysis on the Elon-musk tweets (Exlon-musk.csv)

In [46]:
import pandas as pd 
import numpy as np 
import re # re-->regular expression 

In [47]:
elon = pd.read_csv("Elon_musk.csv",encoding='mac_roman')

In [48]:
elon 

Unnamed: 0.1,Unnamed: 0,Text
0,1,@kunalb11 Iím an alien
1,2,@ID_AA_Carmack Ray tracing on Cyberpunk with H...
2,3,@joerogan @Spotify Great interview!
3,4,@gtera27 Doge is underestimated
4,5,@teslacn Congratulations Tesla China for amazi...
...,...,...
1994,1995,"@flcnhvy True, it sounds so surreal, but the n..."
1995,1996,@PPathole Make sure to read ur terms &amp; con...
1996,1997,@TeslaGong @PPathole Samwise Gamgee
1997,1998,@PPathole Altho Dumb and Dumber is <U+0001F525...


here in the above data set the Text column consist of unwanted characteristics 

In [49]:
elon.shape

(1999, 2)

In [50]:
elon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1999 entries, 0 to 1998
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  1999 non-null   int64 
 1   Text        1999 non-null   object
dtypes: int64(1), object(1)
memory usage: 31.4+ KB


# Except alphabetes all the unwanted characteristics are deleted  

In [52]:
elon.Text = elon.Text.apply(lambda x :re.sub('[^a-zA-Z ]',"",x))

In [53]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,kunalb Im an alien
1,2,IDAACarmack Ray tracing on Cyberpunk with HDR ...
2,3,joerogan Spotify Great interview
3,4,gtera Doge is underestimated
4,5,teslacn Congratulations Tesla China for amazin...
...,...,...
1994,1995,flcnhvy True it sounds so surreal but the nega...
1995,1996,PPathole Make sure to read ur terms amp condit...
1996,1997,TeslaGong PPathole Samwise Gamgee
1997,1998,PPathole Altho Dumb and Dumber is UFUF


after applying the re-->regular expression , the unwantad characteristics are deleated 

# converting the Text table into a lower case alphabets 

In [55]:
elon.Text = elon.Text.apply(lambda x: x.lower())

In [56]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,kunalb im an alien
1,2,idaacarmack ray tracing on cyberpunk with hdr ...
2,3,joerogan spotify great interview
3,4,gtera doge is underestimated
4,5,teslacn congratulations tesla china for amazin...
...,...,...
1994,1995,flcnhvy true it sounds so surreal but the nega...
1995,1996,ppathole make sure to read ur terms amp condit...
1996,1997,teslagong ppathole samwise gamgee
1997,1998,ppathole altho dumb and dumber is ufuf


# Stopwords 

In [58]:
import nltk 
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Naveen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# splitting the values

In [60]:
elon.Text = elon.Text.apply(lambda x : x.split())

In [61]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,"[kunalb, im, an, alien]"
1,2,"[idaacarmack, ray, tracing, on, cyberpunk, wit..."
2,3,"[joerogan, spotify, great, interview]"
3,4,"[gtera, doge, is, underestimated]"
4,5,"[teslacn, congratulations, tesla, china, for, ..."
...,...,...
1994,1995,"[flcnhvy, true, it, sounds, so, surreal, but, ..."
1995,1996,"[ppathole, make, sure, to, read, ur, terms, am..."
1996,1997,"[teslagong, ppathole, samwise, gamgee]"
1997,1998,"[ppathole, altho, dumb, and, dumber, is, ufuf]"


# removing the stopwords

In [63]:
elon.Text = elon.Text.apply(lambda x:[word for word in x if word not in set(stopwords.words('english'))])

In [64]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,"[kunalb, im, alien]"
1,2,"[idaacarmack, ray, tracing, cyberpunk, hdr, ne..."
2,3,"[joerogan, spotify, great, interview]"
3,4,"[gtera, doge, underestimated]"
4,5,"[teslacn, congratulations, tesla, china, amazi..."
...,...,...
1994,1995,"[flcnhvy, true, sounds, surreal, negative, pro..."
1995,1996,"[ppathole, make, sure, read, ur, terms, amp, c..."
1996,1997,"[teslagong, ppathole, samwise, gamgee]"
1997,1998,"[ppathole, altho, dumb, dumber, ufuf]"


the stop words are removed

In [65]:
#applying the Stemmer method 
ps = PorterStemmer()

In [66]:
elon.Text = elon.Text.apply(lambda x: [ps.stem(word) for word in x])

In [67]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,"[kunalb, im, alien]"
1,2,"[idaacarmack, ray, trace, cyberpunk, hdr, next..."
2,3,"[joerogan, spotifi, great, interview]"
3,4,"[gtera, doge, underestim]"
4,5,"[teslacn, congratul, tesla, china, amaz, execu..."
...,...,...
1994,1995,"[flcnhvi, true, sound, surreal, neg, propagand..."
1995,1996,"[ppathol, make, sure, read, ur, term, amp, con..."
1996,1997,"[teslagong, ppathol, samwis, gamge]"
1997,1998,"[ppathol, altho, dumb, dumber, ufuf]"


when we apply the porter stemmer -->it will chop all the end alphabets and replace them by words according to theie process 

# joining the seperated sentences

In [69]:
elon.Text = elon.Text.apply(lambda x :" ".join(x))

In [70]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,kunalb im alien
1,2,idaacarmack ray trace cyberpunk hdr nextlevel tri
2,3,joerogan spotifi great interview
3,4,gtera doge underestim
4,5,teslacn congratul tesla china amaz execut last...
...,...,...
1994,1995,flcnhvi true sound surreal neg propaganda stil...
1995,1996,ppathol make sure read ur term amp condit clic...
1996,1997,teslagong ppathol samwis gamge
1997,1998,ppathol altho dumb dumber ufuf


# SINGLE step for all the above process 

In [72]:
elon1 = pd.read_csv("Elon_musk.csv",encoding='cp1252')

In [73]:
def preprocess(x):
    x=re.sub('[^a-zA-Z ]',"",x)
    x=x.lower()
    x=x.split()
    x=[word for word in x if word not in set(stopwords.words('english'))]
    x=[ps.stem(word) for word in x]
    x=" ".join(x)
    return x 

In [74]:
elon1.Text =elon1['Text'].apply(preprocess)

In [75]:
elon1

Unnamed: 0.1,Unnamed: 0,Text
0,1,kunalb im alien
1,2,idaacarmack ray trace cyberpunk hdr nextlevel tri
2,3,joerogan spotifi great interview
3,4,gtera doge underestim
4,5,teslacn congratul tesla china amaz execut last...
...,...,...
1994,1995,flcnhvi true sound surreal neg propaganda stil...
1995,1996,ppathol make sure read ur term amp condit clic...
1996,1997,teslagong ppathol samwis gamge
1997,1998,ppathol altho dumb dumber ufuf


In [76]:
elon

Unnamed: 0.1,Unnamed: 0,Text
0,1,kunalb im alien
1,2,idaacarmack ray trace cyberpunk hdr nextlevel tri
2,3,joerogan spotifi great interview
3,4,gtera doge underestim
4,5,teslacn congratul tesla china amaz execut last...
...,...,...
1994,1995,flcnhvi true sound surreal neg propaganda stil...
1995,1996,ppathol make sure read ur term amp condit clic...
1996,1997,teslagong ppathol samwis gamge
1997,1998,ppathol altho dumb dumber ufuf
