# Merging Datasets Together

In [143]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import nltk
from nltk.corpus import stopwords
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# Import custom functions
from custom import removeRegex
from custom import getPatternCount
from sentenceprocess import getLemma
from sentenceprocess import posTag

In [3]:
# Import the initial dataset
df1 = pd.read_csv('../data/raw/labeled_data_raw.csv')

In [5]:
# Print df1
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24783 entries, 0 to 24782
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Unnamed: 0          24783 non-null  int64 
 1   count               24783 non-null  int64 
 2   hate_speech         24783 non-null  int64 
 3   offensive_language  24783 non-null  int64 
 4   neither             24783 non-null  int64 
 5   class               24783 non-null  int64 
 6   tweet               24783 non-null  object
dtypes: int64(6), object(1)
memory usage: 1.3+ MB


In [7]:
# Rename first column as ID
df1.rename(columns = {'Unnamed: 0': 'id'}, inplace = True)

# Increasing max length for all columns and number of columns
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_info_columns', 500)
pd.set_option('display.max_rows', 500)

In [8]:
# Set ID to index
df1.set_index('id').head(5)

Unnamed: 0_level_0,count,hate_speech,offensive_language,neither,class,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't complain about cleaning up your house. &amp; as a man you should always take the trash out...
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;


In [9]:
df1['class'].value_counts()

1    19190
2    4163 
0    1430 
Name: class, dtype: int64

In [10]:
# Racist or homophobic comments
hateSpeech = df1.loc[df1['class'] == 0]

In [11]:
hateSpeech.head(50)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
85,85,3,2,1,0,0,"""@Blackman38Tide: @WhaleLookyHere @HowdyDowdy11 queer"" gaywad"
89,90,3,3,0,0,0,"""@CB_Baby24: @white_thunduh alsarabsss"" hes a beaner smh you can tell hes a mexican"
110,111,3,3,0,0,0,"""@DevilGrimz: @VigxRArts you're fucking gay, blacklisted hoe"" Holding out for #TehGodClan anyway http://t.co/xUCcwoetmn"
184,186,3,3,0,0,0,"""@MarkRoundtreeJr: LMFAOOOO I HATE BLACK PEOPLE https://t.co/RNvD2nLCDR"" This is why there's black people and niggers"
202,204,3,2,1,0,0,"""@NoChillPaz: ""At least I'm not a nigger"" http://t.co/RGJa7CfoiT""\n\nLmfao"
204,206,3,2,1,0,0,"""@NotoriousBM95: @_WhitePonyJr_ Ariza is a snake and a coward"" but at least he isn't a cripple like your hero Roach lmaoo"
219,221,6,3,2,1,0,"""@RTNBA: Drakes new shoes that will be released by Nike/Jordan.... Yes, there's glitter on the shoes http://t.co/QCtPLxHEXM"" ....dudes a fag"
260,263,3,2,1,0,0,"""@TheoMaxximus: #GerrysHalloweenParty http://t.co/3ycrSrnjHc"" Halloween was yesterday stupid nigger"
312,317,3,2,1,0,0,"""@ashlingwilde: @ItsNotAdam is bored supposed to be cute, you faggot?"" Sometimes"
315,320,3,2,1,0,0,"""@bigbootybishopp: @white_thunduh lassen cc , the coach wants me"" i feel it youll get more my nig no doubt"


In [12]:
# Neither offesnive nor non-offensive speech
neutral = df1.loc[df1['class'] == 2]

In [13]:
neutral.head(50)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't complain about cleaning up your house. &amp; as a man you should always take the trash out...
40,40,3,0,1,2,2,""" momma said no pussy cats inside my doghouse """
63,63,3,0,0,3,2,"""@Addicted2Guys: -SimplyAddictedToGuys http://t.co/1jL4hi8ZMF"" woof woof hot scally lad"
66,66,3,0,1,2,2,"""@AllAboutManFeet: http://t.co/3gzUpfuMev"" woof woof and hot soles"
67,67,3,0,1,2,2,"""@Allyhaaaaa: Lemmie eat a Oreo &amp; do these dishes."" One oreo? Lol"
70,70,3,0,0,3,2,"""@ArizonasFinest6: Why the eggplant emoji doe?""y he say she looked like scream lmao"
75,75,3,0,1,2,2,"""@BabyAnimalPics: baby monkey bathtime http://t.co/7KPWAdLF0R""\nAwwwwe! This is soooo ADORABLE!"
115,116,3,0,0,3,2,"""@DomWorldPeace: Baseball season for the win. #Yankees"" This is where the love started"
118,119,3,0,0,3,2,"""@DunderbaIl: I'm an early bird and I'm a night owl, so I'm wise and have worms."""
119,120,3,0,1,2,2,"""@EdgarPixar: Overdosing on heavy drugs doesn't sound bad tonight."" I do that pussy shit every day."


In [14]:
neutral['count'].value_counts()

3    3919
6    184 
4    37  
9    19  
7    4   
Name: count, dtype: int64

In [15]:
# Offensive but not hate speech
offensiveLang = df1.loc[df1['class'] == 1]

In [16]:
offensiveLang.head(50)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,5,3,1,2,0,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""
6,6,3,0,3,0,1,"!!!!!!""@__BrighterDays: I can not just sit up and HATE on another bitch .. I got too much shit going on!"""
7,7,3,0,3,0,1,!!!!&#8220;@selfiequeenbri: cause I'm tired of you big bitches coming for us skinny girls!!&#8221;
8,8,3,0,3,0,1,""" &amp; you might not get ya bitch back &amp; thats that """
9,9,3,1,2,0,1,""" @rhythmixx_ :hobbies include: fighting Mariam""\n\nbitch"
10,10,3,0,3,0,1,""" Keeks is a bitch she curves everyone "" lol I walked into a conversation like this. Smh"


In [17]:
# Insert new column in our dataframe to account for positive tweets
df1.insert(5, 'positive', 0)

In [18]:
df1['positive'].value_counts()

0    24783
Name: positive, dtype: int64

In [19]:
df1.tail(5)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet
24778,25291,3,0,2,1,0,1,"you's a muthaf***in lie &#8220;@LifeAsKing: @20_Pearls @corey_emanuel right! His TL is trash &#8230;. Now, mine? Bible scriptures and hymns&#8221;"
24779,25292,3,0,1,2,0,2,"you've gone and broke the wrong heart baby, and drove me redneck crazy"
24780,25294,3,0,3,0,0,1,young buck wanna eat!!.. dat nigguh like I aint fuckin dis up again
24781,25295,6,0,6,0,0,1,youu got wild bitches tellin you lies
24782,25296,3,0,0,3,0,2,"~~Ruffled | Ntac Eileen Dahlia - Beautiful color combination of pink, orange, yellow &amp; white. A Coll http://t.co/H0dYEBvnZB"


In [24]:
# Import second dataset
df2 = pd.read_csv('../data/raw/train_E6oV3lV.csv')

In [25]:
# Get three columns
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31962 entries, 0 to 31961
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      31962 non-null  int64 
 1   label   31962 non-null  int64 
 2   tweet   31962 non-null  object
dtypes: int64(2), object(1)
memory usage: 749.2+ KB


In [26]:
# Given that there are 23,353 offensive tweets in the other dataset, select 23,000 positive tweets from df2
dfPos = df2.loc[df2['label'] == 0]

In [27]:
dfPos.head(50)

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,0,factsguide: society now #motivation
5,6,0,[2/2] huge fan fare and big talking before they leave. chaos and pay disputes when they get there. #allshowandnogo
6,7,0,@user camping tomorrow @user @user @user @user @user @user @user dannyâ¦
7,8,0,the next school year is the year for exams.ð¯ can't think about that ð­ #school #exams #hate #imagine #actorslife #revolutionschool #girl
8,9,0,we won!!! love the land!!! #allin #cavs #champions #cleveland #clevelandcavaliers â¦
9,10,0,@user @user welcome here ! i'm it's so #gr8 !


In [28]:
dfPos23 = dfPos[:23000]

In [29]:
len(dfPos23)

23000

In [30]:
# Setting the index as ID
dfPos23.set_index('id')

Unnamed: 0_level_0,label,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
2,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
3,0,bihday your majesty
4,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
5,0,factsguide: society now #motivation
...,...,...
24725,0,"yes, leeds!! #lesbiunion #girlsweekend"
24726,0,in other non tattoo related news my boy will be #crystalpalace mascot at the home game against liverpool next season ! ð´ðµ
24727,0,finally wo agaya :):)
24728,0,ðð ð #love #instagood #photooftheday top.tags #tbt #cute #me #beautiful #followme #followâ¦


In [31]:
# Recreate the same structure of the other df
dfPos23.insert(1, 'count', 3)
dfPos23.insert(2, 'hate_speech', 0)
dfPos23.insert(3, 'offensive_language', 0)
dfPos23.insert(4, 'neither', 0)
dfPos23.insert(5, 'positive', 3)

In [32]:
dfPos23

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,label,tweet
0,1,3,0,0,0,3,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,3,0,0,0,3,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,3,0,0,0,3,0,bihday your majesty
3,4,3,0,0,0,3,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,3,0,0,0,3,0,factsguide: society now #motivation
...,...,...,...,...,...,...,...,...
24724,24725,3,0,0,0,3,0,"yes, leeds!! #lesbiunion #girlsweekend"
24725,24726,3,0,0,0,3,0,in other non tattoo related news my boy will be #crystalpalace mascot at the home game against liverpool next season ! ð´ðµ
24726,24727,3,0,0,0,3,0,finally wo agaya :):)
24727,24728,3,0,0,0,3,0,ðð ð #love #instagood #photooftheday top.tags #tbt #cute #me #beautiful #followme #followâ¦


In [33]:
# Remove existing lable
dfPos23.drop(['label'], axis = 1, inplace = True)

In [34]:
# Insert label as 4 for positive tweets
dfPos23.insert(6, 'class', 3)

In [35]:
dfPos23.head(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet
0,1,3,0,0,0,3,3,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,3,0,0,0,3,3,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,3,0,0,0,3,3,bihday your majesty
3,4,3,0,0,0,3,3,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,3,0,0,0,3,3,factsguide: society now #motivation
5,6,3,0,0,0,3,3,[2/2] huge fan fare and big talking before they leave. chaos and pay disputes when they get there. #allshowandnogo
6,7,3,0,0,0,3,3,@user camping tomorrow @user @user @user @user @user @user @user dannyâ¦
7,8,3,0,0,0,3,3,the next school year is the year for exams.ð¯ can't think about that ð­ #school #exams #hate #imagine #actorslife #revolutionschool #girl
8,9,3,0,0,0,3,3,we won!!! love the land!!! #allin #cavs #champions #cleveland #clevelandcavaliers â¦
9,10,3,0,0,0,3,3,@user @user welcome here ! i'm it's so #gr8 !


In [36]:
# Checking to see if all IDs are unique
dfPos23['id'] = dfPos23['id'] + 25926

In [37]:
# Final df has 23000 positive tweets
dfPos23.head(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet
0,25927,3,0,0,0,3,3,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,25928,3,0,0,0,3,3,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,25929,3,0,0,0,3,3,bihday your majesty
3,25930,3,0,0,0,3,3,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,25931,3,0,0,0,3,3,factsguide: society now #motivation
5,25932,3,0,0,0,3,3,[2/2] huge fan fare and big talking before they leave. chaos and pay disputes when they get there. #allshowandnogo
6,25933,3,0,0,0,3,3,@user camping tomorrow @user @user @user @user @user @user @user dannyâ¦
7,25934,3,0,0,0,3,3,the next school year is the year for exams.ð¯ can't think about that ð­ #school #exams #hate #imagine #actorslife #revolutionschool #girl
8,25935,3,0,0,0,3,3,we won!!! love the land!!! #allin #cavs #champions #cleveland #clevelandcavaliers â¦
9,25936,3,0,0,0,3,3,@user @user welcome here ! i'm it's so #gr8 !


In [38]:
df = pd.concat([df1, dfPos23], join = 'inner')

In [39]:
# Final dataset has a good balance between positive and negative tweets
df['positive'].value_counts()

0    24783
3    23000
Name: positive, dtype: int64

In [40]:
# Reshuffle the rows
df = df.sample(frac = 1, random_state = 43).reset_index(drop = True)

In [41]:
len(df)

47783

In [42]:
df['positive'].value_counts()

0    24783
3    23000
Name: positive, dtype: int64

# Data Cleaning

In [46]:
df['tweet_low'] = df['tweet'].apply(lambda x: x if type(x) != str else x.lower())
df

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.
...,...,...,...,...,...,...,...,...,...
47778,26408,3,0,0,0,3,3,when quay collab with @user says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #,when quay collab with @user says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #
47779,18857,3,0,3,0,0,1,RT @_groovymovie: &#8220;@Shane_A1: Hmu talmbout match but when I pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me,rt @_groovymovie: &#8220;@shane_a1: hmu talmbout match but when i pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me
47780,43095,3,0,0,0,3,3,angry squeaking frog video: #frog #nature #animals #cute #adorable,angry squeaking frog video: #frog #nature #animals #cute #adorable
47781,20216,3,0,3,0,0,1,RT @obey_jrock__: This is a true ride or die bitch &#128175; http://t.co/y1t8CTQn4U,rt @obey_jrock__: this is a true ride or die bitch &#128175; http://t.co/y1t8ctqn4u


In [47]:
len(df)

47783

# Check for duplicates

In [48]:
dup = df[df.duplicated('tweet_low', keep = 'first')]
dup

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low
303,31205,3,0,0,0,3,3,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
544,50568,3,0,0,0,3,3,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
645,38535,3,0,0,0,3,3,i finally found a way how to delete old tweets! you might find it useful as well: #deletetweets,i finally found a way how to delete old tweets! you might find it useful as well: #deletetweets
1027,39543,3,0,0,0,3,3,i finally found a way how to delete old tweets! you might find it useful as well: #deletetweets,i finally found a way how to delete old tweets! you might find it useful as well: #deletetweets
1085,31253,3,0,0,0,3,3,can #lighttherapy help with or #depression? #altwaystoheal #healthy is #happy !!,can #lighttherapy help with or #depression? #altwaystoheal #healthy is #happy !!
...,...,...,...,...,...,...,...,...,...
47718,46972,3,0,0,0,3,3,@user bihday rg,@user bihday rg
47728,30566,3,0,0,0,3,3,get #up get get #enjoy #music #today #free #apps #free #music,get #up get get #enjoy #music #today #free #apps #free #music
47758,48509,3,0,0,0,3,3,@user the present moment is filled with joy and happiness @user #thinkbigsundaywithmarsha,@user the present moment is filled with joy and happiness @user #thinkbigsundaywithmarsha
47772,48055,3,0,0,0,3,3,ð #love #instagood #photooftheday top.tags #tbt #cute #me #beautiful #followme #followâ¦,ð #love #instagood #photooftheday top.tags #tbt #cute #me #beautiful #followme #followâ¦


In [49]:
# Number of duplicates
len(dup)

1589

In [51]:
# Drop duplicates
df = df.drop_duplicates(subset = 'tweet_low', keep = 'first')

In [52]:
len(df)

46194

In [53]:
dup = df[df.duplicated('tweet_low', keep = 'first')]
dup

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low


In [54]:
# Removed duplicates
singleModel = df[df['tweet_low'].str.contains(r'model')]
singleModel

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low
49,41804,3,0,0,0,3,3,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
368,45453,3,0,0,0,3,3,@user dance like no one is watching â¨ #modelling #fitnesslife #fit #strong #healthy #fitnessâ¦,@user dance like no one is watching â¨ #modelling #fitnesslife #fit #strong #healthy #fitnessâ¦
520,47365,3,0,0,0,3,3,#fathersday 2 all the positive male role model's celebrating today. gift our #spice #rubs &amp; #bbq #sauces @,#fathersday 2 all the positive male role model's celebrating today. gift our #spice #rubs &amp; #bbq #sauces @
575,33239,3,0,0,0,3,3,@user #model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦,@user #model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
1125,28992,3,0,0,0,3,3,@user @user fully agree! expect failure that's what i tell my ss. need to model that!,@user @user fully agree! expect failure that's what i tell my ss. need to model that!
1228,41343,3,0,0,0,3,3,@user dope tune #kilamiti by iam-bars #love #dj #spin #music #media #me #model #tbtâ¦,@user dope tune #kilamiti by iam-bars #love #dj #spin #music #media #me #model #tbtâ¦
2235,44459,3,0,0,0,3,3,finally made it to new blades! lets hope all the models are ok! #modelmaking #aub #nervous #newbladesshow,finally made it to new blades! lets hope all the models are ok! #modelmaking #aub #nervous #newbladesshow
2253,33987,3,0,0,0,3,3,"darling, just fucking own it. âð»ï¸ð¥ððâ¨ðð #photoshoot #photography #model #beautiful #ownitâ¦","darling, just fucking own it. âð»ï¸ð¥ððâ¨ðð #photoshoot #photography #model #beautiful #ownitâ¦"
3231,8137,3,0,3,0,0,1,"Bad bitches from Oakland, she could beeee a model","bad bitches from oakland, she could beeee a model"
3404,41801,3,0,0,0,3,3,çç¬ã #fashion #smiles #love #girl#instagood#follome #xoxo#japan #tokyo #model#jamp!,çç¬ã #fashion #smiles #love #girl#instagood#follome #xoxo#japan #tokyo #model#jamp!


# Remove retweets

In [58]:
retweet = df[df['tweet_low'].str.contains(r'http://t(?!$)')]
retweet

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low
11,3478,3,1,2,0,0,1,"@HopOnTheBeast i found her right here, good job unfollowing me btw faggot http://t.co/kD3iSeIoLd","@hoponthebeast i found her right here, good job unfollowing me btw faggot http://t.co/kd3iseiold"
45,18212,3,0,3,0,0,1,"RT @TooSexist: Women complain that chivalry is dead. Yes congratulations bitches, you killed it with feminism http://t.co/Mrc82ZUhOn","rt @toosexist: women complain that chivalry is dead. yes congratulations bitches, you killed it with feminism http://t.co/mrc82zuhon"
56,23582,3,0,0,3,0,2,Yankees great Joe DiMaggio reportedly used to beat wife Marilyn Monroe. Here she is in 1954 announcing divorce http://t.co/blS7dalMiF,yankees great joe dimaggio reportedly used to beat wife marilyn monroe. here she is in 1954 announcing divorce http://t.co/bls7dalmif
76,45,3,0,3,0,0,1,""" pussy is a powerful drug "" &#128517; #HappyHumpDay http://t.co/R8jsymiB5b",""" pussy is a powerful drug "" &#128517; #happyhumpday http://t.co/r8jsymib5b"
83,1194,3,0,0,3,0,2,&#8220;@CaptainYankee2: Two of the best Yankees Derek Jeter and Joe Torre #JoeTorreDay http://t.co/XMyxfDBKOX&#8221; @jordan_luree,&#8220;@captainyankee2: two of the best yankees derek jeter and joe torre #joetorreday http://t.co/xmyxfdbkox&#8221; @jordan_luree
...,...,...,...,...,...,...,...,...,...
47743,16945,3,0,3,0,0,1,RT @MyDickNeedsCPR: What lonely hoe made this? http://t.co/eEFThf0tvb,rt @mydickneedscpr: what lonely hoe made this? http://t.co/eefthf0tvb
47750,8358,3,0,3,0,0,1,Bored then a hoe! Listening to these fuck ass adults lecturing us with @__vercetti http://t.co/NyBO16RMsh,bored then a hoe! listening to these fuck ass adults lecturing us with @__vercetti http://t.co/nybo16rmsh
47756,25119,4,1,3,0,0,1,trash both ways lol RT @AgdaCoroner: Bitch Killed Herself....Look Like Bill Maher With Makeup on http://t.co/IWLAG2J5Sl,trash both ways lol rt @agdacoroner: bitch killed herself....look like bill maher with makeup on http://t.co/iwlag2j5sl
47781,20216,3,0,3,0,0,1,RT @obey_jrock__: This is a true ride or die bitch &#128175; http://t.co/y1t8CTQn4U,rt @obey_jrock__: this is a true ride or die bitch &#128175; http://t.co/y1t8ctqn4u


In [59]:
retweet['class'].value_counts()

1    1754
2    883 
0    112 
Name: class, dtype: int64

In [60]:
emoji = df[df['tweet'].str.contains(r'#[0-9]')]
emoji['class'].value_counts()

1    4852
2    994 
3    406 
0    213 
Name: class, dtype: int64

In [61]:
posWEmoji = emoji.loc[emoji['class'] == 3]
posWEmoji.head(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low
51,48434,3,0,0,0,3,3,@user just done my first #2minutebeachclean in ages. also got one wet foot putting a starfish back in the water â­ï¸ð #southwold ð #dâ¦,@user just done my first #2minutebeachclean in ages. also got one wet foot putting a starfish back in the water â­ï¸ð #southwold ð #dâ¦
192,26678,3,0,0,0,3,3,checked in #holiday #croatia #13daystogo,checked in #holiday #croatia #13daystogo
430,39963,3,0,0,0,3,3,"""no one is better than dad."" fathersday!!! #fathersday2016 #fatherslove #disney #disneyfana #101dalmatians","""no one is better than dad."" fathersday!!! #fathersday2016 #fatherslove #disney #disneyfana #101dalmatians"
538,32023,3,0,0,0,3,3,welcome to candjdays! :) #first #video #vlog #checkitout #youtube #couple #9videos #phoenix #az #florida,welcome to candjdays! :) #first #video #vlog #checkitout #youtube #couple #9videos #phoenix #az #florida
594,38174,3,0,0,0,3,3,@user d-7 opening soon #miami #restaurant #saltandsugarcafe #20flaglerstreet,@user d-7 opening soon #miami #restaurant #saltandsugarcafe #20flaglerstreet
610,46640,3,0,0,0,3,3,@user pheonix u10s new look team to play noh walkden tomorrow #1stgame,@user pheonix u10s new look team to play noh walkden tomorrow #1stgame
613,25939,3,0,0,0,3,3,i get to see my daddy today!! #80days #gettingfed,i get to see my daddy today!! #80days #gettingfed
954,35212,3,0,0,0,3,3,popsy &amp; little all ready for @user #10minutestogo,popsy &amp; little all ready for @user #10minutestogo
1200,30155,3,0,0,0,3,3,i'm off to #florida #usa in #july for #3 #weeks woop woop #holiday,i'm off to #florida #usa in #july for #3 #weeks woop woop #holiday
1226,32523,3,0,0,0,3,3,the new baby is on her way! xx #700d #cannon,the new baby is on her way! xx #700d #cannon


In [64]:
# Delete URLs
df['no_url'] = np.vectorize(removeRegex)(df['tweet_low'], "https?://[A-za-z0-9./]*")

In [66]:
df.head(15)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦


In [79]:
df['handle_count'] = np.vectorize(getPatternCount)(df['tweet_low'], "@[\w]*")

In [80]:
df['handle_count'].value_counts()

0     23968
1     15967
2     4176 
3     1344 
4     416  
5     158  
6     89   
8     31   
7     26   
9     12   
10    6    
11    1    
Name: handle_count, dtype: int64

In [81]:
df.head(5)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1


In [82]:
# Remove Twitter handles
df['no_handle'] = np.vectorize(removeRegex)(df['no_url'], "@[\w]*")

In [83]:
df.tail(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle
47771,30963,3,0,0,0,3,3,@user '' x'mas &amp; my bihday disney ! '' #love #thanks #karen #â¡,@user '' x'mas &amp; my bihday disney ! '' #love #thanks #karen #â¡,@user '' x'mas &amp; my bihday disney ! '' #love #thanks #karen #â¡,1,'' x'mas &amp; my bihday disney ! '' #love #thanks #karen #â¡
47773,42033,3,0,0,0,3,3,my soul is happiest on the water! #soul #happier #happiest #water #ocean #beach #caliâ¦,my soul is happiest on the water! #soul #happier #happiest #water #ocean #beach #caliâ¦,my soul is happiest on the water! #soul #happier #happiest #water #ocean #beach #caliâ¦,0,my soul is happiest on the water! #soul #happier #happiest #water #ocean #beach #caliâ¦
47774,40465,3,0,0,0,3,3,"â #nzd/usd post-rbnz rally almost reversed, 0.7000 closer #blog #silver #gold #forex","â #nzd/usd post-rbnz rally almost reversed, 0.7000 closer #blog #silver #gold #forex","â #nzd/usd post-rbnz rally almost reversed, 0.7000 closer #blog #silver #gold #forex",0,"â #nzd/usd post-rbnz rally almost reversed, 0.7000 closer #blog #silver #gold #forex"
47775,16694,3,0,3,0,0,1,"RT @MAKEUP_SEX: trash talked by many . hated by some . & guess how many fucks i give , its less than one .","rt @makeup_sex: trash talked by many . hated by some . & guess how many fucks i give , its less than one .","rt @makeup_sex: trash talked by many . hated by some . & guess how many fucks i give , its less than one .",1,"rt : trash talked by many . hated by some . & guess how many fucks i give , its less than one ."
47776,29931,3,0,0,0,3,3,"#bihday to leo's mom, #celia ..","#bihday to leo's mom, #celia ..","#bihday to leo's mom, #celia ..",0,"#bihday to leo's mom, #celia .."
47777,5463,3,0,3,0,0,1,@_ElenaRaquel_ its swag bitch aha,@_elenaraquel_ its swag bitch aha,@_elenaraquel_ its swag bitch aha,1,its swag bitch aha
47778,26408,3,0,0,0,3,3,when quay collab with @user says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #,when quay collab with @user says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #,when quay collab with @user says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #,1,when quay collab with says sold out!!!ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð©ð«ð© #noooo #whyyyy #loveyoudesi #
47779,18857,3,0,3,0,0,1,RT @_groovymovie: &#8220;@Shane_A1: Hmu talmbout match but when I pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me,rt @_groovymovie: &#8220;@shane_a1: hmu talmbout match but when i pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me,rt @_groovymovie: &#8220;@shane_a1: hmu talmbout match but when i pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me,2,rt : &#8220;: hmu talmbout match but when i pull up its 30 other niccas wit ya &#128530;&#8221; smfh shit like that kill me
47781,20216,3,0,3,0,0,1,RT @obey_jrock__: This is a true ride or die bitch &#128175; http://t.co/y1t8CTQn4U,rt @obey_jrock__: this is a true ride or die bitch &#128175; http://t.co/y1t8ctqn4u,rt @obey_jrock__: this is a true ride or die bitch &#128175;,1,rt : this is a true ride or die bitch &#128175;
47782,14488,3,0,3,0,0,1,RT @AllHailTaron_: I got the deals for the low. I know you hoes lonely so fuck with these cuffing season specials. &#128184;&#128175; http://t.co/YURpX99Hdb,rt @allhailtaron_: i got the deals for the low. i know you hoes lonely so fuck with these cuffing season specials. &#128184;&#128175; http://t.co/yurpx99hdb,rt @allhailtaron_: i got the deals for the low. i know you hoes lonely so fuck with these cuffing season specials. &#128184;&#128175;,1,rt : i got the deals for the low. i know you hoes lonely so fuck with these cuffing season specials. &#128184;&#128175;


In [84]:
# Remove special characters (except hashtags and apostrophes). Replace with a whitespace.
df['no_special'] = df['no_handle'].str.replace("[^a-zA-Z#']", " ")

In [85]:
df.head(15)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear


In [86]:
# Remove single hashtags with nothing following them
df['remove_empty_hashtag'] = np.vectorize(removeRegex)(df['no_special'], " # ")

In [87]:
# Counting length of tweets after URLs are removed. Use this to see if there is a correlation between length of a tweet and the sentiment
df['tweet_length'] = df['remove_empty_hashtag'].apply(lambda x: len(x))
df.head(50)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,62
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany,my final legislative session day has officially begun #albany,69
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots,faggots,9
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,96


In [88]:
# Check that there are no tweets that are greater than 280 characters
dfLen = df.loc[df['tweet_length'] > 280]

In [89]:
dfLen.sort_values(by=['tweet_length'], ascending = False)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length
29982,18267,3,0,3,0,0,1,RT @TrxllLegend: One good girl is worth a thousand bitches\n\n&#128112; = &#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#8230;,rt @trxlllegend: one good girl is worth a thousand bitches\n\n&#128112; = &#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#8230;,rt @trxlllegend: one good girl is worth a thousand bitches\n\n&#128112; = &#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#8230;,1,rt : one good girl is worth a thousand bitches\n\n&#128112; = &#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#128109;&#8230;,rt one good girl is worth a thousand bitches # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #,rt one good girl is worth a thousand bitches,511
28044,13733,3,0,3,0,0,1,No summer school? &#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515; eat a dick school. Im done with your bitch ass !!!!!!,no summer school? &#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515; eat a dick school. im done with your bitch ass !!!!!!,no summer school? &#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515; eat a dick school. im done with your bitch ass !!!!!!,0,no summer school? &#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515;&#128515; eat a dick school. im done with your bitch ass !!!!!!,no summer school # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # eat a dick school im done with your bitch ass,no summer school eat a dick school im done with your bitch ass,462
28352,1631,3,0,3,0,0,1,&#8220;@Untouchable_T: Never seen so many perfect bitches til I made a Twitter &#128564; but &#128056;&#9749;&#65039;&#8221;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;,&#8220;@untouchable_t: never seen so many perfect bitches til i made a twitter &#128564; but &#128056;&#9749;&#65039;&#8221;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;,&#8220;@untouchable_t: never seen so many perfect bitches til i made a twitter &#128564; but &#128056;&#9749;&#65039;&#8221;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;,1,&#8220;: never seen so many perfect bitches til i made a twitter &#128564; but &#128056;&#9749;&#65039;&#8221;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;&#128175;,# never seen so many perfect bitches til i made a twitter # but # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #,never seen so many perfect bitches til i made a twitter but,434
23454,7644,3,0,2,1,0,1,A guy on True Blood is getting his penis inspected and the doctor told him it look like an eggplant\n\n&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;,a guy on true blood is getting his penis inspected and the doctor told him it look like an eggplant\n\n&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;,a guy on true blood is getting his penis inspected and the doctor told him it look like an eggplant\n\n&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;,0,a guy on true blood is getting his penis inspected and the doctor told him it look like an eggplant\n\n&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;&#127814;,a guy on true blood is getting his penis inspected and the doctor told him it look like an eggplant # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #,a guy on true blood is getting his penis inspected and the doctor told him it look like an eggplant,335
6865,19305,3,0,3,0,0,1,RT @digiflorals: bitch do it look like I care \n\n&#12288; N\n&#12288;&#12288; O\n&#12288;&#12288;&#12288; O\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.,rt @digiflorals: bitch do it look like i care \n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.,rt @digiflorals: bitch do it look like i care \n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.,1,rt : bitch do it look like i care \n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.,rt bitch do it look like i care # n # # o # # # o # # # # o # # # # # o # # # # # o # # # # # o # # # # # # # # # # # # # # # # # # # # # #,rt bitch do it look like i care n o o o o o o,302
39283,19214,3,0,3,0,0,1,"RT @cotydankh: ""are these hoes loyal?""\n\n&#12288; N\n&#12288;&#12288; O\n&#12288;&#12288;&#12288; O\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.","rt @cotydankh: ""are these hoes loyal?""\n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.","rt @cotydankh: ""are these hoes loyal?""\n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.",1,"rt : ""are these hoes loyal?""\n\n&#12288; n\n&#12288;&#12288; o\n&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288;&#12288; o\n&#12288;&#12288;&#12288;&#12288;&#12288;o\n&#12288;&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288; &#12290;\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288;.\n&#12288;&#12288;&#12288; .\n&#12288;&#12288;&#12288;&#12288;.",rt are these hoes loyal # n # # o # # # o # # # # o # # # # # o # # # # # o # # # # # o # # # # # # # # # # # # # # # # # # # # # #,rt are these hoes loyal n o o o o o o,296
24023,16918,3,0,0,3,0,2,RT @Mr_MshkL: &#1589;&#1608;&#1585;&#1577; &#1604;&#1591;&#1575;&#1574;&#1585; &#1575;&#1604;&#1603;&#1575;&#1585;&#1583;&#1610;&#1606;&#1575;&#1604; &#1575;&#1604;&#1571;&#1581;&#1605;&#1585; &#1575;&#1604;&#1605;&#1605;&#1610;&#1586;&#1548; &#1571;&#1581;&#1583; &#1588;&#1582;&#1589;&#1610;&#1575;&#1578; &#1604;&#1593;&#1576;&#1577; &#1575;&#1604;&#1591;&#1610;&#1608;&#1585; &#1575;&#1604;&#1594;&#1575;&#1590;&#1576;&#1577; angry birds &#1575;&#1604;&#1588;&#1607;&#1610;&#1585;&#1577; !\n&#8226; http://t.co/0lowkClb,rt @mr_mshkl: &#1589;&#1608;&#1585;&#1577; &#1604;&#1591;&#1575;&#1574;&#1585; &#1575;&#1604;&#1603;&#1575;&#1585;&#1583;&#1610;&#1606;&#1575;&#1604; &#1575;&#1604;&#1571;&#1581;&#1605;&#1585; &#1575;&#1604;&#1605;&#1605;&#1610;&#1586;&#1548; &#1571;&#1581;&#1583; &#1588;&#1582;&#1589;&#1610;&#1575;&#1578; &#1604;&#1593;&#1576;&#1577; &#1575;&#1604;&#1591;&#1610;&#1608;&#1585; &#1575;&#1604;&#1594;&#1575;&#1590;&#1576;&#1577; angry birds &#1575;&#1604;&#1588;&#1607;&#1610;&#1585;&#1577; !\n&#8226; http://t.co/0lowkclb,rt @mr_mshkl: &#1589;&#1608;&#1585;&#1577; &#1604;&#1591;&#1575;&#1574;&#1585; &#1575;&#1604;&#1603;&#1575;&#1585;&#1583;&#1610;&#1606;&#1575;&#1604; &#1575;&#1604;&#1571;&#1581;&#1605;&#1585; &#1575;&#1604;&#1605;&#1605;&#1610;&#1586;&#1548; &#1571;&#1581;&#1583; &#1588;&#1582;&#1589;&#1610;&#1575;&#1578; &#1604;&#1593;&#1576;&#1577; &#1575;&#1604;&#1591;&#1610;&#1608;&#1585; &#1575;&#1604;&#1594;&#1575;&#1590;&#1576;&#1577; angry birds &#1575;&#1604;&#1588;&#1607;&#1610;&#1585;&#1577; !\n&#8226;,1,rt : &#1589;&#1608;&#1585;&#1577; &#1604;&#1591;&#1575;&#1574;&#1585; &#1575;&#1604;&#1603;&#1575;&#1585;&#1583;&#1610;&#1606;&#1575;&#1604; &#1575;&#1604;&#1571;&#1581;&#1605;&#1585; &#1575;&#1604;&#1605;&#1605;&#1610;&#1586;&#1548; &#1571;&#1581;&#1583; &#1588;&#1582;&#1589;&#1610;&#1575;&#1578; &#1604;&#1593;&#1576;&#1577; &#1575;&#1604;&#1591;&#1610;&#1608;&#1585; &#1575;&#1604;&#1594;&#1575;&#1590;&#1576;&#1577; angry birds &#1575;&#1604;&#1588;&#1607;&#1610;&#1585;&#1577; !\n&#8226;,rt # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # angry birds # # # # # # # #,rt angry birds,295


In [90]:
# Remove spaces in tweets to count only characters
df['nospaces'] = df['remove_empty_hashtag'].str.replace(" ", "")

In [91]:
df['character_count'] = df['nospaces'].apply(lambda x: len(x))
df.head(50)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117,i'venoticedalotof#icontfpresentationsmentionhappinesswonderifprofessionhasaboveaveragehappiness,95
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,62,gettingforthisweekendsshows#country#music#lylepierce,52
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany,my final legislative session day has officially begun #albany,69,myfinallegislativesessiondayhasofficiallybegun#albany,53
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots,faggots,9,faggots,7
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,96,a#bikinikindoflifesummer#palmtrees#breeze#place#cali#california#swimwear,72


In [92]:
df.isna().sum()

id                      0
count                   0
hate_speech             0
offensive_language      0
neither                 0
positive                0
class                   0
tweet                   0
tweet_low               0
no_url                  0
handle_count            0
no_handle               0
no_special              0
remove_empty_hashtag    0
tweet_length            0
nospaces                0
character_count         0
dtype: int64

# Lemmatization with Parts of Speech

In [145]:
df['lemmatized'] = df['no_special'].apply(lambda x: getLemma(x))

In [146]:
df.head(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count,lemmatized
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21,day to go # gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18,get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35,these hoe get more body than a cemetery # # #
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33,dubya hat today teabagger movement
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117,i'venoticedalotof#icontfpresentationsmentionhappinesswonderifprofessionhasaboveaveragehappiness,95,i 've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,62,gettingforthisweekendsshows#country#music#lylepierce,52,get for this weekend show # country # music # lylepierce
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany,my final legislative session day has officially begun #albany,69,myfinallegislativesessiondayhasofficiallybegun#albany,53,my final legislative session day have officially begin # albany
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots,faggots,9,faggots,7,faggot
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,96,a#bikinikindoflifesummer#palmtrees#breeze#place#cali#california#swimwear,72,a # bikini kind of life summer # palmtrees # breeze # place # cali # california # swimwear


In [147]:
df['lemma1'] = df['lemmatized'].str.replace('# ', '#')
df.head(5)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count,lemmatized,lemma1
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21,day to go # gettingthere,day to go #gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18,get off my twitter fag,get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35,these hoe get more body than a cemetery # # #,these hoe get more body than a cemetery ###
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white,a friend just tell me she 's afraid to go to dc rally amp be attack by #berniebros or the racist cuz she 's not white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33,dubya hat today teabagger movement,dubya hat today teabagger movement


In [148]:
df['lemma_final'] = df['lemmatized'].str.replace(" '", "'")
df.head(15)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count,lemmatized,lemma1,lemma_final
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21,day to go # gettingthere,day to go #gettingthere,day to go # gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18,get off my twitter fag,get off my twitter fag,get off my twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35,these hoe get more body than a cemetery # # #,these hoe get more body than a cemetery ###,these hoe get more body than a cemetery # # #
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white,a friend just tell me she 's afraid to go to dc rally amp be attack by #berniebros or the racist cuz she 's not white,a friend just tell me she's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she's not white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117,i'venoticedalotof#icontfpresentationsmentionhappinesswonderifprofessionhasaboveaveragehappiness,95,i 've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness,i 've notice a lot of #icontf presentation mention happiness wonder if profession have above average happiness,i've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,62,gettingforthisweekendsshows#country#music#lylepierce,52,get for this weekend show # country # music # lylepierce,get for this weekend show #country #music #lylepierce,get for this weekend show # country # music # lylepierce
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany,my final legislative session day has officially begun #albany,69,myfinallegislativesessiondayhasofficiallybegun#albany,53,my final legislative session day have officially begin # albany,my final legislative session day have officially begin #albany,my final legislative session day have officially begin # albany
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots,faggots,9,faggots,7,faggot,faggot,faggot
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,96,a#bikinikindoflifesummer#palmtrees#breeze#place#cali#california#swimwear,72,a # bikini kind of life summer # palmtrees # breeze # place # cali # california # swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a # bikini kind of life summer # palmtrees # breeze # place # cali # california # swimwear


# Remove StopWords and Words Less than Two Characters

In [149]:
# Import stopwords in English
stop = stopwords.words('english')
df['tweet_no_stopwords'] = df['lemmatized'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

In [150]:
df.head(10)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count,lemmatized,lemma1,lemma_final,tweet_no_stopwords
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21,day to go # gettingthere,day to go #gettingthere,day to go # gettingthere,day go # gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18,get off my twitter fag,get off my twitter fag,get off my twitter fag,get twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35,these hoe get more body than a cemetery # # #,these hoe get more body than a cemetery ###,these hoe get more body than a cemetery # # #,hoe get body cemetery # # #
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white,a friend just tell me she 's afraid to go to dc rally amp be attack by #berniebros or the racist cuz she 's not white,a friend just tell me she's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she's not white,friend tell 's afraid go dc rally amp attack # berniebros racist cuz 's white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement
5,33371,3,0,0,0,3,3,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness?,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117,i'venoticedalotof#icontfpresentationsmentionhappinesswonderifprofessionhasaboveaveragehappiness,95,i 've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness,i 've notice a lot of #icontf presentation mention happiness wonder if profession have above average happiness,i've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness,'ve notice lot # icontf presentation mention happiness wonder profession average happiness
6,27911,3,0,0,0,3,3,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows! #country #music #lylepierce,0,getting for this weekends shows! #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,getting for this weekends shows #country #music #lylepierce,62,gettingforthisweekendsshows#country#music#lylepierce,52,get for this weekend show # country # music # lylepierce,get for this weekend show #country #music #lylepierce,get for this weekend show # country # music # lylepierce,get weekend show # country # music # lylepierce
7,42015,3,0,0,0,3,3,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,@user my final legislative session day has officially begun! @user @user #albany,3,my final legislative session day has officially begun! #albany,my final legislative session day has officially begun #albany,my final legislative session day has officially begun #albany,69,myfinallegislativesessiondayhasofficiallybegun#albany,53,my final legislative session day have officially begin # albany,my final legislative session day have officially begin #albany,my final legislative session day have officially begin # albany,final legislative session day officially begin # albany
8,6788,3,2,1,0,0,0,@lucas_wright955 @MichaelGT03 faggots,@lucas_wright955 @michaelgt03 faggots,@lucas_wright955 @michaelgt03 faggots,2,faggots,faggots,faggots,9,faggots,7,faggot,faggot,faggot,faggot
9,43002,3,0,0,0,3,3,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,0,a #bikini kind of life ð´ summer #palmtrees #breeze #place #cali #california #swimwearâ¦,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,96,a#bikinikindoflifesummer#palmtrees#breeze#place#cali#california#swimwear,72,a # bikini kind of life summer # palmtrees # breeze # place # cali # california # swimwear,a #bikini kind of life summer #palmtrees #breeze #place #cali #california #swimwear,a # bikini kind of life summer # palmtrees # breeze # place # cali # california # swimwear,# bikini kind life summer # palmtrees # breeze # place # cali # california # swimwear


In [151]:
# Removing words shorter than two characters because they will likely not be relevant
df['tweet_no_stopwords_no_short'] = df['tweet_no_stopwords'].apply(lambda x: ' '.join([word for word in x.split() if
                                                                                      len(word) > 2]))
df.head(5)

Unnamed: 0,id,count,hate_speech,offensive_language,neither,positive,class,tweet,tweet_low,no_url,handle_count,no_handle,no_special,remove_empty_hashtag,tweet_length,nospaces,character_count,lemmatized,lemma1,lemma_final,tweet_no_stopwords,tweet_no_stopwords_no_short
0,28842,3,0,0,0,3,3,13 days to go #gettingthere,13 days to go #gettingthere,13 days to go #gettingthere,0,13 days to go #gettingthere,days to go #gettingthere,days to go #gettingthere,30,daystogo#gettingthere,21,day to go # gettingthere,day to go #gettingthere,day to go # gettingthere,day go # gettingthere,day gettingthere
1,5684,3,1,2,0,0,1,@anggxo get off my twitter fag,@anggxo get off my twitter fag,@anggxo get off my twitter fag,1,get off my twitter fag,get off my twitter fag,get off my twitter fag,23,getoffmytwitterfag,18,get off my twitter fag,get off my twitter fag,get off my twitter fag,get twitter fag,get twitter fag
2,22263,3,1,2,0,0,1,These hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,these hoes got more bodies than a cemetery # # #,these hoes got more bodies than a cemetery,58,thesehoesgotmorebodiesthanacemetery,35,these hoe get more body than a cemetery # # #,these hoe get more body than a cemetery ###,these hoe get more body than a cemetery # # #,hoe get body cemetery # # #,hoe get body cemetery
3,40774,3,0,0,0,3,3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the racists cuz she's not white.,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,afriendjusttoldmeshe'safraidtogotodcrallyampbeattackedby#berniebrosortheracistscuzshe'snotwhite,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white,a friend just tell me she 's afraid to go to dc rally amp be attack by #berniebros or the racist cuz she 's not white,a friend just tell me she's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she's not white,friend tell 's afraid go dc rally amp attack # berniebros racist cuz 's white,friend tell afraid rally amp attack berniebros racist cuz white
4,7082,3,0,0,3,0,2,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,@ramaxe1965 dubya hates todays teabagger movement.,1,dubya hates todays teabagger movement.,dubya hates todays teabagger movement,dubya hates todays teabagger movement,39,dubyahatestodaysteabaggermovement,33,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement,dubya hat today teabagger movement


# Labels for Binary Classification

In [152]:
df['positive'].value_counts()

0    24773
3    21421
Name: positive, dtype: int64

In [153]:
# Exclude neutral tweets because we are building a binary model right now
df = df.loc[df['class'] != 2]

In [154]:
len(df)

42031

In [155]:
df['neg_label'] = df['class'].apply(lambda x: 0 if x == 3 else 1)

In [156]:
df['neg_label'].value_counts()

0    21421
1    20610
Name: neg_label, dtype: int64

# Dealing with Missing Values, Dropping Columns, Save for EDA

In [158]:
df.drop(['count', 'hate_speech', 'offensive_language', 'neither', 'positive', 'class', 'tweet', 'no_url', 'no_handle', 
        'no_special'], axis = 1, inplace = True)

In [160]:
df.drop(['id', 'nospaces'], axis = 1, inplace = True)

In [161]:
df.head(5)

Unnamed: 0,tweet_low,handle_count,remove_empty_hashtag,tweet_length,character_count,lemmatized,lemma1,lemma_final,tweet_no_stopwords,tweet_no_stopwords_no_short,neg_label
0,13 days to go #gettingthere,0,days to go #gettingthere,30,21,day to go # gettingthere,day to go #gettingthere,day to go # gettingthere,day go # gettingthere,day gettingthere,0
1,@anggxo get off my twitter fag,1,get off my twitter fag,23,18,get off my twitter fag,get off my twitter fag,get off my twitter fag,get twitter fag,get twitter fag,1
2,these hoes got more bodies than a cemetery&#128056;&#9749;&#65039;.,0,these hoes got more bodies than a cemetery,58,35,these hoe get more body than a cemetery # # #,these hoe get more body than a cemetery ###,these hoe get more body than a cemetery # # #,hoe get body cemetery # # #,hoe get body cemetery,1
3,a friend just told me she's afraid to go to dc rally &amp; be attacked by #berniebros or the @user racists cuz she's not white.,1,a friend just told me she's afraid to go to dc rally amp be attacked by #berniebros or the racists cuz she's not white,124,95,a friend just tell me she 's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she 's not white,a friend just tell me she 's afraid to go to dc rally amp be attack by #berniebros or the racist cuz she 's not white,a friend just tell me she's afraid to go to dc rally amp be attack by # berniebros or the racist cuz she's not white,friend tell 's afraid go dc rally amp attack # berniebros racist cuz 's white,friend tell afraid rally amp attack berniebros racist cuz white,0
5,i've noticed a lot of #icontf16 presentations mention happiness. wonder if profession has above average happiness? @user,1,i've noticed a lot of #icontf presentations mention happiness wonder if profession has above average happiness,117,95,i 've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness,i 've notice a lot of #icontf presentation mention happiness wonder if profession have above average happiness,i've notice a lot of # icontf presentation mention happiness wonder if profession have above average happiness,'ve notice lot # icontf presentation mention happiness wonder profession average happiness,'ve notice lot icontf presentation mention happiness wonder profession average happiness,0


In [162]:
df.isna().sum()

tweet_low                      0
handle_count                   0
remove_empty_hashtag           0
tweet_length                   0
character_count                0
lemmatized                     0
lemma1                         0
lemma_final                    0
tweet_no_stopwords             0
tweet_no_stopwords_no_short    0
neg_label                      0
dtype: int64

In [163]:
len(df)

42031

In [None]:
# Save to file
