# Hate-speech-Tweets-Data-Prep

## Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nltk
import re

from nltk.corpus import stopwords

from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
lemmatizer = nltk.stem.WordNetLemmatizer()

nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('stopwords')

import warnings
warnings.filterwarnings('ignore') 
%matplotlib inline

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Mounting Google Drive

Data is stored in google drive

In [2]:
from google.colab import drive

drive.mount('/content/gdrive')
root_path = 'gdrive/My Drive/hate_speech/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


#### Reading data in csv format

In [3]:
data_1 =  pd.read_csv('gdrive/My Drive/hate_speech/labeled_data.csv')
data_1.head()

Unnamed: 0.1,Unnamed: 0,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't...
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn ba...
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby...
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she lo...
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you...


#### Inspecting data

In [4]:
data_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24783 entries, 0 to 24782
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Unnamed: 0          24783 non-null  int64 
 1   count               24783 non-null  int64 
 2   hate_speech         24783 non-null  int64 
 3   offensive_language  24783 non-null  int64 
 4   neither             24783 non-null  int64 
 5   class               24783 non-null  int64 
 6   tweet               24783 non-null  object
dtypes: int64(6), object(1)
memory usage: 1.3+ MB


#### Indexing first column as "id"

In [5]:
# renaming the first columns as id
data_1.rename(columns={'Unnamed: 0':'id'}, inplace = True)

# increasing max length for all columns and number of columns
pd.set_option('display.max_colwidth', -1)
pd.set_option("display.max_columns", 50)

pd.set_option('display.max_info_columns', 500)
pd.set_option('display.max_rows', 500)

In [6]:
# set id to index
data_1.set_index('id').head(5)

Unnamed: 0_level_0,count,hate_speech,offensive_language,neither,class,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't complain about cleaning up your house. &amp; as a man you should always take the trash out...
1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;


#### Checking class value counts

In [7]:
data_1['class'].value_counts()

1    19190
2    4163 
0    1430 
Name: class, dtype: int64

#### Inspecting Hate speech class

In [8]:
hate_speech_df = data_1[data_1['class'] == 0]
hate_speech_df.head()

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
85,85,3,2,1,0,0,"""@Blackman38Tide: @WhaleLookyHere @HowdyDowdy11 queer"" gaywad"
89,90,3,3,0,0,0,"""@CB_Baby24: @white_thunduh alsarabsss"" hes a beaner smh you can tell hes a mexican"
110,111,3,3,0,0,0,"""@DevilGrimz: @VigxRArts you're fucking gay, blacklisted hoe"" Holding out for #TehGodClan anyway http://t.co/xUCcwoetmn"
184,186,3,3,0,0,0,"""@MarkRoundtreeJr: LMFAOOOO I HATE BLACK PEOPLE https://t.co/RNvD2nLCDR"" This is why there's black people and niggers"
202,204,3,2,1,0,0,"""@NoChillPaz: ""At least I'm not a nigger"" http://t.co/RGJa7CfoiT""\r\n\r\nLmfao"


#### Inspecting Offensive language class

In [9]:
offensive_lang_df = data_1[data_1['class'] == 1]
offensive_lang_df.head()

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,5,3,1,2,0,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""


#### Inspecting Neutral language class

In [10]:
neutral_df = data_1[data_1['class'] == 2]
neutral_df.head()

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
0,0,3,0,0,3,2,!!! RT @mayasolovely: As a woman you shouldn't complain about cleaning up your house. &amp; as a man you should always take the trash out...
40,40,3,0,1,2,2,""" momma said no pussy cats inside my doghouse """
63,63,3,0,0,3,2,"""@Addicted2Guys: -SimplyAddictedToGuys http://t.co/1jL4hi8ZMF"" woof woof hot scally lad"
66,66,3,0,1,2,2,"""@AllAboutManFeet: http://t.co/3gzUpfuMev"" woof woof and hot soles"
67,67,3,0,1,2,2,"""@Allyhaaaaa: Lemmie eat a Oreo &amp; do these dishes."" One oreo? Lol"


## Reading 2nd dataset 

In [11]:
data_2 = pd.read_csv('gdrive/My Drive/hate_speech/train_label.csv')
data_2.head()

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,0,factsguide: society now #motivation


In [12]:
data_2.set_index('id').head(5)

Unnamed: 0_level_0,label,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
2,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
3,0,bihday your majesty
4,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
5,0,factsguide: society now #motivation


#### Inspecting Neutral language class from 2nd dataset

In [13]:
neutral_2 = data_2[data_2['label'] == 0]
neutral_2.head()

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,0,factsguide: society now #motivation


#### Inspecting Hate speech class from 2nd dataset

In [14]:
hate_speech_2 = data_2[data_2['label'] == 1]
hate_speech_2.head()

Unnamed: 0,id,label,tweet
13,14,1,@user #cnn calls #michigan middle school 'build the wall' chant '' #tcot
14,15,1,no comment! in #australia #opkillingbay #seashepherd #helpcovedolphins #thecove #helpcovedolphins
17,18,1,retweet if you agree!
23,24,1,@user @user lumpy says i am a . prove it lumpy.
34,35,1,it's unbelievable that in the 21st century we'd need something like this. again. #neverump #xenophobia


#### No. of records of respective classes from 1st datasets

In [15]:
print(" Number of records for hate speech (1st Dataframe) : ", len(hate_speech_df['class']))
print("\n Number of records for offensive language (1st Dataframe) : ", len(offensive_lang_df['class']))
print("\n Number of records for neutral language (1st Dataframe) : ", len(neutral_df['class']))

 Number of records for hate speech (1st Dataframe) :  1430

 Number of records for offensive language (1st Dataframe) :  19190

 Number of records for neutral language (1st Dataframe) :  4163


#### No. of records of respective classes from 2nd datasets

In [16]:
print(" Number of records for hate speech (2nd Dataframe) : ", len(hate_speech_2['label']))
print("\n Number of records for neutral language (2nd Dataframe) : ", len(neutral_2['label']))

 Number of records for hate speech (2nd Dataframe) :  2242

 Number of records for neutral language (2nd Dataframe) :  29720


In [17]:
data_1.drop(data_1[data_1['class'] == 2].index, inplace=True)
data_1.head()

Unnamed: 0,id,count,hate_speech,offensive_language,neither,class,tweet
1,1,3,0,3,0,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,2,3,0,3,0,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,3,0,2,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,4,6,0,6,0,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,5,3,1,2,0,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""


#### Dropping neutral class from 1st dataset

In [18]:
data_1['class'].value_counts()

1    19190
0    1430 
Name: class, dtype: int64

In [19]:
data_1.drop(columns=['count',	'hate_speech',	'offensive_language',	'neither'], axis=1, inplace=True)
data_1.set_index('id').head()

Unnamed: 0_level_0,class,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""


In [20]:
neutral_2 = neutral_2[:21000]
print(len(neutral_2))

21000


In [21]:
neutral_2.insert(2, "target", 0, True)
neutral_2

Unnamed: 0,id,label,target,tweet
0,1,0,0,@user when a father is dysfunctional and is so selfish he drags his kids into his dysfunction. #run
1,2,0,0,@user @user thanks for #lyft credit i can't use cause they don't offer wheelchair vans in pdx. #disapointed #getthanked
2,3,0,0,bihday your majesty
3,4,0,0,#model i love u take with u all the time in urð±!!! ððððð¦ð¦ð¦
4,5,0,0,factsguide: society now #motivation
...,...,...,...,...
22574,22575,0,0,coldâï¸âï¸âï¸ #like4like #likeforlikealways #hours#tagsforlikesâ¦
22575,22576,0,0,@user 3 more sleeps until @user ð´ð´ð´ #hurryup @user @user @user @user #conquercancer
22576,22577,0,0,can't wait to sta gettin my shit together. #future #goals
22579,22580,0,0,can #lighttherapy help with #sad or #depression? #altwaystoheal #healthy is !!


In [22]:
data_1.insert(2, "target", 1, True)
data_1

Unnamed: 0,id,class,target,tweet
1,1,1,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,2,1,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,3,1,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,4,1,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,5,1,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""
...,...,...,...,...
24776,25289,0,1,you're all niggers
24777,25290,0,1,you're such a retard i hope you get type 2 diabetes and die from a sugar rush you fucking faggot @Dare_ILK
24778,25291,1,1,"you's a muthaf***in lie &#8220;@LifeAsKing: @20_Pearls @corey_emanuel right! His TL is trash &#8230;. Now, mine? Bible scriptures and hymns&#8221;"
24780,25294,1,1,young buck wanna eat!!.. dat nigguh like I aint fuckin dis up again


In [23]:
data_1.drop(columns=['class'], axis=1, inplace=True)

neutral_2.drop(columns=['label'], axis=1, inplace=True)

In [24]:
data_1.target.value_counts()

1    20620
Name: target, dtype: int64

In [25]:
neutral_2.target.value_counts()

0    21000
Name: target, dtype: int64

### Merging both dataframes

In [26]:
df_final = pd.concat([data_1, neutral_2], join = 'inner')
df_final.set_index('id', inplace=True)
df_final.head()

Unnamed: 0_level_0,target,tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,!!!!! RT @mleew17: boy dats cold...tyga dwn bad for cuffin dat hoe in the 1st place!!
2,1,!!!!!!! RT @UrKindOfBrand Dawg!!!! RT @80sbaby4life: You ever fuck a bitch and she start to cry? You be confused as shit
3,1,!!!!!!!!! RT @C_G_Anderson: @viva_based she look like a tranny
4,1,!!!!!!!!!!!!! RT @ShenikaRoberts: The shit you hear about me might be true or it might be faker than the bitch who told it to ya &#57361;
5,1,"!!!!!!!!!!!!!!!!!!""@T_Madison_x: The shit just blows me..claim you so faithful and down for somebody but still fucking with hoes! &#128514;&#128514;&#128514;"""


In [27]:
# Checking final count after merging both dataframes
df_final.target.value_counts()

0    21000
1    20620
Name: target, dtype: int64

In [28]:
# Resetting index
final_df = df_final.sample(frac=1, random_state=43).reset_index(drop=True)
final_df

Unnamed: 0,target,tweet
0,0,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine
4,1,@munafal777 karma is a bitch
...,...,...
41615,0,all dressed up and ready for @user
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;


In [29]:
# total length of final dataset

len(final_df)

41620

#### Converting tweets to lowercase

In [30]:
final_df['tweet_lower'] = final_df['tweet'].apply(lambda x : x if type(x)!=str else x.lower())
final_df.head()

Unnamed: 0,target,tweet,tweet_lower
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch


In [31]:
# Dropping all duplicates
final_df = final_df.drop_duplicates(subset ='tweet_lower', keep = 'first')
print(len(final_df))

40222


In [32]:
assert len(final_df) == 40222

In [33]:
# removing string in retweets that has URL in it
retweets =  final_df[final_df['tweet_lower'].str.contains(r'http://t(?!$)')]
retweets

Unnamed: 0,target,tweet,tweet_lower
17,1,Young Gem &amp; Don Chief been killing it in the yo...going in on that real music..no bubble gum trash in the booth... http://t.co/lkjNb7uZmt,young gem &amp; don chief been killing it in the yo...going in on that real music..no bubble gum trash in the booth... http://t.co/lkjnb7uzmt
19,1,RT @urbandictionary: @The2kGod nigger: A fully grown niglet http://t.co/HJKlNHDrWT http://t.co/x6y5XKKPMq,rt @urbandictionary: @the2kgod nigger: a fully grown niglet http://t.co/hjklnhdrwt http://t.co/x6y5xkkpmq
22,1,RT @Bayonettes: When you tweet som'n just for laughs &amp; a bitch wanna spoil the fun by getting Serious.. &#128530;&#128530; http://t.co/9LD1HWz1BU,rt @bayonettes: when you tweet som'n just for laughs &amp; a bitch wanna spoil the fun by getting serious.. &#128530;&#128530; http://t.co/9ld1hwz1bu
96,1,RT @PRAYINGFORHEAD: ya pussy stank @_ANGELSAMUELS http://t.co/i9tXjSDOZZ,rt @prayingforhead: ya pussy stank @_angelsamuels http://t.co/i9txjsdozz
100,1,ALL HE NEEDS IS A SKIRT!!!\r\nSure looks like a bitch to me.\r\n http://t.co/89HH52pCUe&#8221;,all he needs is a skirt!!!\r\nsure looks like a bitch to me.\r\n http://t.co/89hh52pcue&#8221;
...,...,...,...
41486,1,RT @LiIuglymane: When your side bitch tries to hug you in public http://t.co/T5uRIsXNdF,rt @liiuglymane: when your side bitch tries to hug you in public http://t.co/t5urisxndf
41520,1,&#8220;@JalapenoBright: This hoe waited until she got 45 to make a sextape chile.. http://t.co/r9gMmImakO&#8221; OH MY LORD&#128561;,&#8220;@jalapenobright: this hoe waited until she got 45 to make a sextape chile.. http://t.co/r9gmmimako&#8221; oh my lord&#128561;
41524,1,"#longhair don't care got my #gayboyproblems everywhere, I'm a #bitch I'm a #champ, I'm totally full of&#8230; http://t.co/Qia88qGmWh","#longhair don't care got my #gayboyproblems everywhere, i'm a #bitch i'm a #champ, i'm totally full of&#8230; http://t.co/qia88qgmwh"
41605,1,"#porn,#android,#iphone,#ipad,#sex,#xxx, | #CloseUp | pussy fuck close up http://t.co/0dvaZWLq2q","#porn,#android,#iphone,#ipad,#sex,#xxx, | #closeup | pussy fuck close up http://t.co/0dvazwlq2q"


In [34]:
tweets_emoji =  final_df[final_df['tweet_lower'].str.contains(r'#[0-9]')]
tweets_emoji['target'].value_counts()

1    5065
0    379 
Name: target, dtype: int64

In [35]:
def pattern_remover(input_txt, pattern):
    
    """ Function replacing a specific regex pattern with an empty space"""
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)
        
    return input_txt

In [36]:
def count(input_txt, pattern):
    
    """Simple function returning the pattern count instances in each tweet"""
    r = re.findall(pattern, input_txt)
    return len(r)


In [37]:
final_df['handle_count'] = np.vectorize(count)(final_df['tweet_lower'], "@[\w]*")
final_df['handle_count'].value_counts()

0     21227
1     13623
2     3596 
3     1152 
4     351  
5     143  
6     72   
7     24   
8     20   
9     9    
10    4    
11    1    
Name: handle_count, dtype: int64

In [38]:
# remove twitter handles (@user)
final_df['handle_removed'] = np.vectorize(pattern_remover)(final_df['tweet_lower'], "@[\w]*")

In [39]:
final_df.head()

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch


In [40]:
# pattern remover to delete URLS for RETWEETS
final_df['url_removed'] = np.vectorize(pattern_remover)(final_df['handle_removed'], "https?://[A-Za-z0-9./]*")
final_df.head()

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch


In [41]:
# removing all characters apart from letters hashtags and apostrophes
final_df['special_char_removed'] = final_df['url_removed'].str.replace("[^a-zA-Z#']", " ")
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch
...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #


In [42]:
# removing hashtags
final_df['single_hashtag_removed'] = np.vectorize(pattern_remover)(final_df['special_char_removed'], " # ")
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch
...,...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch


In [43]:
# counting tweet length
final_df['tweets_length'] = final_df['single_hashtag_removed'].apply(lambda x: len(x))
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17
...,...,...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,32
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,84
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,91
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch,54


In [44]:
# checking length of tweets greater than 280 as more than 280 are outliers

len(final_df[final_df['tweets_length'] > 280])

6

In [45]:
# removing those rows who has tweets length greater than 280 as they are very less in number so insignificant in model building process

idx = final_df.index[final_df['tweets_length'] > 280]

final_df = final_df.drop(idx, axis=0)

assert len(final_df[final_df['tweets_length'] > 280]) == 0

In [46]:
final_df.isna().sum()

target                    0
tweet                     0
tweet_lower               0
handle_count              0
handle_removed            0
url_removed               0
special_char_removed      0
single_hashtag_removed    0
tweets_length             0
dtype: int64

In [47]:
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17
...,...,...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,32
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,84
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,91
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch,54


In [48]:
def nltk_tag_to_wordnet_tag(nltk_tag):
    
    """ Function defining the actual part of speech as adjective, 
    verb, noun or adverb"""
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

In [49]:
def lemmatize_sentence(sentence):
    
    """Function to lemmatize with POS all tweets"""
    #tokenize the sentence and find the POS tag for each token
    nltk_tagged = nltk.pos_tag(nltk.word_tokenize(sentence))
    #tuple of (token, wordnet_tag)
    wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged)
    lemmatized_sentence = []
    for word, tag in wordnet_tagged:
            # 'ass' kept being reduced to 'as' for some reason         
        if word == 'ass':
            lemmatized_sentence.append(word)
        
        elif tag is None:
            #if there is no available tag, append the token as is
            lemmatized_sentence.append(word)
        else:
            #else use the tag to lemmatize the token
            lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return " ".join(lemmatized_sentence)


In [50]:
final_df['lemmatized'] = final_df['single_hashtag_removed'].apply(lambda x: lemmatize_sentence(x))
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length,lemmatized
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49,brand new big flowdan in the email # horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23,fuck bitch get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95,rt exactly do n't be gettin no additional pussy for them soft ass tweet jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77,niccas use to be like you always tryn pull a finesse duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17,karma be a bitch
...,...,...,...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,32,all dress up and ready for
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,84,too many bitch get rabies and i hate a ho hoppin ' woman # stank pussy poppin ' woman
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,91,the # lesmiserables gang ready for westendlive # lesmiserables # lesmis # westend # love
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch,54,dumb ugly stupid bullshit ass bitch


In [51]:
# removing space after hashtags
final_df['lemmatized'] = final_df['lemmatized'].str.replace('# ', '#')
final_df

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length,lemmatized
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49,brand new big flowdan in the email #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23,fuck bitch get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95,rt exactly do n't be gettin no additional pussy for them soft ass tweet jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77,niccas use to be like you always tryn pull a finesse duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17,karma be a bitch
...,...,...,...,...,...,...,...,...,...,...
41615,0,all dressed up and ready for @user,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,32,all dress up and ready for
41616,1,Too many bitches got rabies And I hate a ho hoppin' woman #Stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,84,too many bitch get rabies and i hate a ho hoppin ' woman #stank pussy poppin ' woman
41617,0,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,91,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love
41618,1,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch,54,dumb ugly stupid bullshit ass bitch


In [52]:
# replacing space before "'"
final_df['lemmatized'] = final_df['lemmatized'].str.replace(" '" ,"'")
final_df.head()

Unnamed: 0,target,tweet,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length,lemmatized
0,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49,brand new big flowdan in the email #horrorshow
1,1,@mckinley719 fuck bitches get money,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23,fuck bitch get money
2,1,RT @_____AL: Exactly. Don't be gettin no additional pussy for them soft ass tweets. Jus be you dude lol,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95,rt exactly do n't be gettin no additional pussy for them soft ass tweet jus be you dude lol
3,1,Niccas use to be like you always Tryn pull a finesse....Duh nicca I need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77,niccas use to be like you always tryn pull a finesse duh nicca i need mine
4,1,@munafal777 karma is a bitch,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17,karma be a bitch


In [53]:
# removing stopwords
stop = stopwords.words('english')
final_df['tweet_stopwords_removed'] = final_df['lemmatized'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

In [54]:
final_df['tweet_stopwords_removed'] = final_df['tweet_stopwords_removed'].apply(lambda x: ' '.join([word for word in x.split() if len(word)>2])) 

In [55]:
final_df.drop(columns='tweet', axis=1, inplace=True)

In [56]:
final_df.reset_index(inplace=True, drop=True)

In [57]:
final_df = final_df.loc[:, ["tweet_lower","handle_count","handle_removed","url_removed", "special_char_removed", "single_hashtag_removed", "tweets_length", "lemmatized", "tweet_stopwords_removed", "target"]]

In [58]:
final_df

Unnamed: 0,tweet_lower,handle_count,handle_removed,url_removed,special_char_removed,single_hashtag_removed,tweets_length,lemmatized,tweet_stopwords_removed,target
0,brand new big flowdan in the emails #horrorshow,0,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,brand new big flowdan in the emails #horrorshow,49,brand new big flowdan in the email #horrorshow,brand new big flowdan email #horrorshow,0
1,@mckinley719 fuck bitches get money,1,fuck bitches get money,fuck bitches get money,fuck bitches get money,fuck bitches get money,23,fuck bitch get money,fuck bitch get money,1
2,rt @_____al: exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,1,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt : exactly. don't be gettin no additional pussy for them soft ass tweets. jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,rt exactly don't be gettin no additional pussy for them soft ass tweets jus be you dude lol,95,rt exactly do n't be gettin no additional pussy for them soft ass tweet jus be you dude lol,exactly n't gettin additional pussy soft ass tweet jus dude lol,1
3,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,0,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse....duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use to be like you always tryn pull a finesse duh nicca i need mine,77,niccas use to be like you always tryn pull a finesse duh nicca i need mine,niccas use like always tryn pull finesse duh nicca need mine,1
4,@munafal777 karma is a bitch,1,karma is a bitch,karma is a bitch,karma is a bitch,karma is a bitch,17,karma be a bitch,karma bitch,1
...,...,...,...,...,...,...,...,...,...,...
40211,all dressed up and ready for @user,1,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,all dressed up and ready for,32,all dress up and ready for,dress ready,0
40212,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,0,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy-poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,too many bitches got rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,84,too many bitch get rabies and i hate a ho hoppin' woman #stank pussy poppin' woman,many bitch get rabies hate hoppin' woman #stank pussy poppin' woman,1
40213,@user the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,1,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #loveâ¦,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,91,the #lesmiserables gang ready for westendlive #lesmiserables #lesmis #westend #love,#lesmiserables gang ready westendlive #lesmiserables #lesmis #westend #love,0
40214,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,0,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch &#128074;&#128074;&#128074;,dumb ugly stupid bullshit ass bitch # # #,dumb ugly stupid bullshit ass bitch,54,dumb ugly stupid bullshit ass bitch,dumb ugly stupid bullshit ass bitch,1


In [59]:
# Saving final data to google drive in order to build model
final_df.to_csv('final_data.csv')
!cp final_data.csv "gdrive/My Drive/hate_speech/"