# <center>  Trump Reddit Comments
Source:  https://www.kaggle.com/amalinow/reddit-comments-on-presidential-inauguration
### Import Preliminaries

In [35]:
%matplotlib inline

import numpy as np
import nltk
import matplotlib.pyplot as plt
import pandas as pd
import warnings

plt.rcParams['figure.figsize'] = (30,10)
%config InlineBackend.figure_format = 'retina'
warnings.filterwarnings('ignore')

### Import Data

In [36]:
# importind the data via dropbox link
comments = pd.DataFrame(pd.read_csv('https://www.dropbox.com/s/wuunsztqf5viw8k/reddit_trump.csv?dl=1'))
comments.shape

(272, 8)

In [37]:
# viewing the dataframe data structure
comments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 272 entries, 0 to 271
Data columns (total 8 columns):
Title            272 non-null object
Post             272 non-null object
PostDate         272 non-null object
Metadata         272 non-null object
Comments         272 non-null object
PostLocation     272 non-null object
PostLatLon       272 non-null object
CommentLatLon    272 non-null object
dtypes: object(8)
memory usage: 17.1+ KB


In [38]:
# viewing sample within the dataframe
comments.head(15)

Unnamed: 0,Title,Post,PostDate,Metadata,Comments,PostLocation,PostLatLon,CommentLatLon
0,Americans seem more interested in inauguration...,201020112012Americans seem more interested in ...,2017-01-19T22:39:25+00:00,"1,987 points 208 comments submitted 15 hours a...",top 200 commentsshow all 210sorted by: besttop...,[],"[29.8645414, 67.3294806]","[29.8645414, 67.3294806]"
1,People are being told not to watch Inauguration,208209210People are being told not to watch In...,2017-01-19T04:17:02+00:00,209 points 37 comments submitted 1 day ago by ...,all 37 commentssorted by: besttopnewcontrovers...,[],"[29.8645414, 67.3294806]","[29.8645414, 67.3294806]"
2,Michael Moore reveals plans for major protest ...,10.0k10.0k10.0kMichael Moore reveals plans for...,2016-12-11T11:43:16+00:00,"10,057 points 4,014 comments submitted 1 month...",top 200 commentsshow 500sorted by: besttopnewc...,[],"[35.3395079, -97.48670279999999]","[35.3395079, -97.48670279999999]"
3,PROJECT VERITAS - Part I: Undercover investiga...,19.4k19.4k19.4kPROJECT VERITAS - Part I: Under...,2017-01-16T17:27:45+00:00,"19,400 points 1,822 comments submitted 3 days ...",top 200 commentsshow 500sorted by: besttopnewc...,[],"[47.7510741, -120.7401386]","[47.7510741, -120.7401386]"
4,It's official: Trump will become the first U.S...,33.5k33.5k33.5kIt's official: Trump will becom...,2016-11-14T19:32:21+00:00,"33,529 points 1,135 comments submitted 2 month...",top 200 commentsshow 500sorted by: besttopnewc...,[],[],[]
5,Anyone going to Washington to watch the inaugu...,136137138Anyone going to Washington to watch t...,2016-11-20T15:12:01+00:00,137 points 26 comments submitted 2 months ago ...,all 26 commentssorted by: besttopnewcontrovers...,[u'Washington'],[],[]
6,WE MUST STOP TRUMP'S INAUGURATION,858385848585WE MUST STOP TRUMP'S INAUGURATION ...,2016-12-09T10:57:08+00:00,"8,594 points 166 comments submitted 1 month ag...",all 166 commentssorted by: besttopnewcontrover...,[],[],[]
7,"Discussion Thread for Veep S05E10 - ""Inaugurat...","868788Discussion Thread for Veep S05E10 - ""Ina...",2016-06-27T01:56:52+00:00,90 points 412 comments submitted 6 months ago ...,top 200 commentsshow all 412sorted by: besttop...,[],[],[]
8,PROJECT VERITAS - Part II: NEW Investigation U...,14.4k14.4k14.4kPROJECT VERITAS - Part II: NEW ...,2017-01-17T19:17:55+00:00,"14,446 points 1,601 comments submitted 2 days ...",top 200 commentsshow 500sorted by: besttopnewc...,[],[],[]
9,"""Taylor Swift and Kanye to perform at Trump in...","14.1k14.1k14.1k""Taylor Swift and Kanye to perf...",2016-12-13T19:52:07+00:00,"14,068 points 409 comments submitted 1 month a...",top 200 commentsshow all 409sorted by: besttop...,[u'Kanye'],[],[]


# <center> Exploratory Analysis

In [39]:
# viewing some sample posts
print(comments['Post'].sample(1).values)

[ "796079617962Let's show those Libtards that we appreciate 3 Doors Down performing at inauguration even if they don't! (i.redd.it)submitted 5 days ago by Diotima245USAF209 commentsshare"]


In [40]:
# viewing some sample titles
print(comments['Title'].sample(1).values)

[ 'The first-known photograph of a presidential inauguration, March 1857. [789 x 683]']


# <center>  Cleaning Data

##### Reformatting Parent ID Feature

In [41]:
# Removing Parent IDs from Post, and creating a new feature for them, when necessary

comments['Parent ID'] = np.NaN
for index, obs in enumerate(comments['Post']):
    
    # locating if the first value in the post is a number
    if obs[0].isdigit() == True:
        # finding the lenght of the id
        for character in range(0,len(obs)):
            number_length = character
            if obs[character].isdigit() == False:
                #print(number_length)
                break
    
    # removing potential ids that are less than 5, as these are just comments starting with numbers
    # creating parent id feature, and removing parent id from post
    if (obs[0].isdigit() == True) & (number_length > 5):
        comments['Parent ID'].iloc[index] = comments['Post'].iloc[index][0:number_length]
        comments['Post'].iloc[index] = comments['Post'].iloc[index][number_length:]

# viewing a sample of the changes
comments[['Post','Parent ID']].sample(5)

Unnamed: 0,Post,Parent ID
233,WOLF BLITZER CNN WAS JUST THREATENING TRUMP AT...,164016411642
200,One of my ASL interpreting professors was work...,149814991500
182,"Kellyanne Conways birthday is January 20th, he...",358535863587
114,Trump inauguration boycott escalates (bbc.com)...,212621272128
197,Trump ‘aggressively’ pursuing Netanyahu to att...,128112821283


##### Removing Submission Information

In [42]:
# removing submission information from post comment
for index, obs in enumerate(comments['Post']):
    post_endpoint = comments['Post'][index].find('submitted')
    comments['Post'].iloc[index] = comments['Post'].iloc[index][:post_endpoint]

##### Reformatting Source Feature

In [43]:
comments['Source'] = ''
# find the beginning and end of the source value
for index, obs in enumerate(comments['Post']):
        source_end = comments['Post'][index].rfind(')')
        source_start = source_end
        while obs[source_start] != '(':
            source_start = source_start-1
        # input source value in Source feature
        comments['Source'].iloc[index] = comments['Post'].iloc[index][source_start:source_end+1]
        # remove the source from the post text
        comments['Post'].iloc[index] = comments['Post'].iloc[index][:source_start]
            

##### Creating Points and Comments feature from Metdata

In [44]:
comments['Metadata'][4]

'33,529 points 1,135 comments submitted 2 months ago by moonlightsugar to /r/The_Donald'

In [52]:
comments['Points'] = '0'
for index, obs in enumerate(comments['Metadata']):
    point_ref = obs.find('points')
    comments['Points'].iloc[index] = comments['Metadata'].iloc[index][0:point_ref+6]
    comments['Metadata'].iloc[index] = comments['Metadata'].iloc[index][point_ref+6:]

In [53]:
comments

Unnamed: 0,Title,Post,PostDate,Metadata,Comments,PostLocation,PostLatLon,CommentLatLon,Parent ID,Source,Points
0,Americans seem more interested in inauguration...,Americans seem more interested in inauguration...,2017-01-19T22:39:25+00:00,208 comments submitted 15 hours ago by kajana...,top 200 commentsshow all 210sorted by: besttop...,[],"[29.8645414, 67.3294806]","[29.8645414, 67.3294806]",201020112012,(vox.com),"1,987 points"
1,People are being told not to watch Inauguration,People are being told not to watch Inauguration,2017-01-19T04:17:02+00:00,37 comments submitted 1 day ago by this-is-my...,all 37 commentssorted by: besttopnewcontrovers...,[],"[29.8645414, 67.3294806]","[29.8645414, 67.3294806]",208209210,(self.The_Donald),209 points
2,Michael Moore reveals plans for major protest ...,10.0k10.0k10.0kMichael Moore reveals plans for...,2016-12-11T11:43:16+00:00,"4,014 comments submitted 1 month ago by god_i...",top 200 commentsshow 500sorted by: besttopnewc...,[],"[35.3395079, -97.48670279999999]","[35.3395079, -97.48670279999999]",,(independent.co.uk),"10,057 points"
3,PROJECT VERITAS - Part I: Undercover investiga...,19.4k19.4k19.4kPROJECT VERITAS - Part I: Under...,2017-01-16T17:27:45+00:00,"1,822 comments submitted 3 days ago by Simi51...",top 200 commentsshow 500sorted by: besttopnewc...,[],"[47.7510741, -120.7401386]","[47.7510741, -120.7401386]",,(youtube.com),"19,400 points"
4,It's official: Trump will become the first U.S...,33.5k33.5k33.5kIt's official: Trump will becom...,2016-11-14T19:32:21+00:00,"1,135 comments submitted 2 months ago by moon...",top 200 commentsshow 500sorted by: besttopnewc...,[],[],[],,(i.sli.mg),"33,529 points"
5,Anyone going to Washington to watch the inaugu...,Anyone going to Washington to watch the inaugu...,2016-11-20T15:12:01+00:00,26 comments submitted 2 months ago by bieberg...,all 26 commentssorted by: besttopnewcontrovers...,[u'Washington'],[],[],136137138,(self.AskThe_Donald),137 points
6,WE MUST STOP TRUMP'S INAUGURATION,WE MUST STOP TRUMP'S INAUGURATION,2016-12-09T10:57:08+00:00,166 comments submitted 1 month ago by rex_fur...,all 166 commentssorted by: besttopnewcontrover...,[],[],[],858385848585,(i.redd.it),"8,594 points"
7,"Discussion Thread for Veep S05E10 - ""Inaugurat...","Discussion Thread for Veep S05E10 - ""Inaugurat...",2016-06-27T01:56:52+00:00,412 comments submitted 6 months ago by exitst...,top 200 commentsshow all 412sorted by: besttop...,[],[],[],868788,(self.Veep),90 points
8,PROJECT VERITAS - Part II: NEW Investigation U...,14.4k14.4k14.4kPROJECT VERITAS - Part II: NEW ...,2017-01-17T19:17:55+00:00,"1,601 comments submitted 2 days ago by YESCAN...",top 200 commentsshow 500sorted by: besttopnewc...,[],[],[],,(youtube.com),"14,446 points"
9,"""Taylor Swift and Kanye to perform at Trump in...","14.1k14.1k14.1k""Taylor Swift and Kanye to perf...",2016-12-13T19:52:07+00:00,409 comments submitted 1 month ago by G3roni ...,top 200 commentsshow all 409sorted by: besttop...,[u'Kanye'],[],[],,(i.redd.it),"14,068 points"


In [None]:
# viewing a sample of our changes
comments.sample(5)