In [1]:
# Import Dependencies
import pandas as pd
import re

In [2]:
# Grabbing stored Reddit Data files
date = '2021-06-10'
subreddit_list = ['askmen', 'askscience', 'askwomen', 'confession', 
                  'fitness', 'food', 'gaming', 'jokes', 'letstalkmusic', 
                  'lifeprotips', 'machinelearning', 'math', 
                  'relationships', 'roastme', 'science', 'sex', 
                  'therapy', 'todayilearned', 'totallynotrobots', 
                  'travel', 'wouldyourather', 'writingprompts', 'zen']

file_name_list = []

for subreddit in subreddit_list:
    file_name = date + '_' + subreddit + '.csv'
    file_name_list.append(file_name)

In [3]:
# Loading stored Reddit Data files
list_df = []
for file_name in file_name_list:
    df = pd.read_csv(file_name, index_col=None, header=0)
    list_df.append(df)

    
raw_reddit_data = pd.concat(list_df, axis=0, ignore_index=True)

In [4]:
raw_reddit_data

Unnamed: 0,Post_ID,Post,Comment,Score
0,4biemc,Basically it goes like this. Somebody posts a...,"[Today, our commenters were pretty cool guys](...",16629
1,4r1sqx,"She claims she thought her stuff was ""too girl...","Read the bottle - if it says ""shower gel"" or ""...",6458
2,5bvh8n,"Thanks, and have a good day.",[The mods' feelings about any posts/comments r...,5744
3,6a0lns,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...,1875
4,6kfzpq,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...,25849
...,...,...,...,...
5503,nj95cc,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...,46
5504,xy322,http://i.imgur.com/jqyhw.jpg It's more about t...,zen is the most simple belief set out there: i...,16
5505,y8325,"We have very few submissions, but quite a lot ...",deleted ^^^^^^^^^^^^^^^^0.6321gone94407),21
5506,y9swt,"Posters to r/Zen, please - terse Zen-like sayi...","Definitely agree with this. A lot of the ""ridd...",31


In [5]:
# Selecting Post and Comment into (comment/reply) pair
reddit_data = raw_reddit_data[['Post', 'Comment']]

In [6]:
# Renaming columns
old_columns = ['Post', 'Comment']
new_columns = ['Comment', 'Reply']
reddit_data = reddit_data.rename(dict(zip(old_columns, new_columns)), 
                                 axis='columns')

In [7]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a...,"[Today, our commenters were pretty cool guys](..."
1,"She claims she thought her stuff was ""too girl...","Read the bottle - if it says ""shower gel"" or ""..."
2,"Thanks, and have a good day.",[The mods' feelings about any posts/comments r...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5504,http://i.imgur.com/jqyhw.jpg It's more about t...,zen is the most simple belief set out there: i...
5505,"We have very few submissions, but quite a lot ...",deleted ^^^^^^^^^^^^^^^^0.6321gone94407)
5506,"Posters to r/Zen, please - terse Zen-like sayi...","Definitely agree with this. A lot of the ""ridd..."


In [8]:
# Cleaning text in reddit data
reddit_data['Comment'] = reddit_data['Comment'].apply(lambda x: ' '.join(str(x).split()))
reddit_data['Reply'] = reddit_data['Reply'].apply(lambda x: ' '.join(str(x).split()))

In [9]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"[Today, our commenters were pretty cool guys](..."
1,"She claims she thought her stuff was ""too girl...","Read the bottle - if it says ""shower gel"" or ""..."
2,"Thanks, and have a good day.",[The mods' feelings about any posts/comments r...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5504,http://i.imgur.com/jqyhw.jpg It's more about t...,zen is the most simple belief set out there: i...
5505,"We have very few submissions, but quite a lot ...",deleted ^^^^^^^^^^^^^^^^0.6321gone94407)
5506,"Posters to r/Zen, please - terse Zen-like sayi...","Definitely agree with this. A lot of the ""ridd..."


In [10]:
# Retaining only alphabet, numbers and /.':?!,
reddit_data['Comment'] = reddit_data['Comment'].str.replace("[^a-zA-Z0-9/.':?!,]", ' ')
reddit_data['Reply'] = reddit_data['Reply'].str.replace("[^a-zA-Z0-9/.':?!,]", ' ')

In [11]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"Today, our commenters were pretty cool guys ..."
1,She claims she thought her stuff was too girl...,Read the bottle if it says shower gel or ...
2,"Thanks, and have a good day.",The mods' feelings about any posts/comments r...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5504,http://i.imgur.com/jqyhw.jpg It's more about t...,zen is the most simple belief set out there: i...
5505,"We have very few submissions, but quite a lot ...",deleted 0.6321gone94407
5506,"Posters to r/Zen, please terse Zen like sayi...",Definitely agree with this. A lot of the ridd...


In [12]:
reddit_data['Reply'][0]

' Today, our commenters were pretty cool guys  http://imgur.com/HcR1Opd '

In [13]:
# Recleaning text in reddit data
reddit_data['Comment'] = reddit_data['Comment'].apply(lambda x: ' '.join(str(x).split()))
reddit_data['Reply'] = reddit_data['Reply'].apply(lambda x: ' '.join(str(x).split()))

In [14]:
reddit_data['Reply'][0]

'Today, our commenters were pretty cool guys http://imgur.com/HcR1Opd'

In [15]:
# Removing links
reddit_data['Comment'] = reddit_data['Comment'].str.replace('https:\/\/.*', '')
reddit_data['Comment'] = reddit_data['Comment'].str.replace('https:\/\/*.*/*', '')
reddit_data['Comment'] = reddit_data['Comment'].str.replace('http:\/\/.*', '')
reddit_data['Comment'] = reddit_data['Comment'].str.replace('http:\/\/*.*/*', '')
reddit_data['Reply'] = reddit_data['Reply'].str.replace('https:\/\/.*', '')
reddit_data['Reply'] = reddit_data['Reply'].str.replace('https:\/\/*.*/*', '')
reddit_data['Reply'] = reddit_data['Reply'].str.replace('http:\/\/.*', '')
reddit_data['Reply'] = reddit_data['Reply'].str.replace('http:\/\/*.*/*', '')

In [16]:
reddit_data['Reply'][0]

'Today, our commenters were pretty cool guys '

In [17]:
# Recleaning text in reddit data
reddit_data['Comment'] = reddit_data['Comment'].apply(lambda x: ' '.join(str(x).split()))
reddit_data['Reply'] = reddit_data['Reply'].apply(lambda x: ' '.join(str(x).split()))

In [18]:
reddit_data['Reply'][0]

'Today, our commenters were pretty cool guys'

In [19]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"Today, our commenters were pretty cool guys"
1,She claims she thought her stuff was too girly...,Read the bottle if it says shower gel or body ...
2,"Thanks, and have a good day.",The mods' feelings about any posts/comments re...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5504,,zen is the most simple belief set out there: i...
5505,"We have very few submissions, but quite a lot ...",deleted 0.6321gone94407
5506,"Posters to r/Zen, please terse Zen like saying...",Definitely agree with this. A lot of the riddl...


In [20]:
reddit_data['Reply'][5505]

'deleted 0.6321gone94407'

In [21]:
# Filtering alphanumeric words
reddit_data['Comment'] = reddit_data['Comment'].str.replace("[a-zA-Z]*\d+[a-zA-Z]+|[a-zA-Z]+\d+[a-zA-Z]*", '')
reddit_data['Reply'] = reddit_data['Reply'].str.replace("[a-zA-Z]*\d+[a-zA-Z]+|[a-zA-Z]+\d+[a-zA-Z]*", '')

In [22]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"Today, our commenters were pretty cool guys"
1,She claims she thought her stuff was too girly...,Read the bottle if it says shower gel or body ...
2,"Thanks, and have a good day.",The mods' feelings about any posts/comments re...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5504,,zen is the most simple belief set out there: i...
5505,"We have very few submissions, but quite a lot ...",deleted 0.94407
5506,"Posters to r/Zen, please terse Zen like saying...",Definitely agree with this. A lot of the riddl...


In [23]:
# Removing cleaned empty posts and comments
empty_comments = reddit_data['Comment'] != ""
empty_replies = reddit_data['Reply'] != ""
reddit_data = reddit_data[empty_comments & empty_replies]

In [24]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"Today, our commenters were pretty cool guys"
1,She claims she thought her stuff was too girly...,Read the bottle if it says shower gel or body ...
2,"Thanks, and have a good day.",The mods' feelings about any posts/comments re...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5502,I never posted here and mostly just lurk occas...,This is how the place has always been. There a...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5505,"We have very few submissions, but quite a lot ...",deleted 0.94407
5506,"Posters to r/Zen, please terse Zen like saying...",Definitely agree with this. A lot of the riddl...


In [25]:
reddit_data['Comment'][7]

'This is more for those who built their own wealth instead of those who were born into it. I m wondering as your wealth grew, or you became rich , what changes occurred that you did not expect? EDIT: I m getting a lot of what about wealthy women? questions. I just want to say, I am asking males because I m a male myself so I feel their experiences to be more relatable, this is the r/askmen sub and you are very free to ask women the same question on the r/askwomen sub. No sexist intentions, just find males more relatable in this context.'

In [26]:
reddit_data['Reply'][7]

'Things haven t changed much for me. Neither friends nor family hit me up for money. I dress like a slob. I only work when I want to but that has gotten me complacent. Women don t throw themselves at me nor do I think they re after my money. I travel a lot. I almost never cook for myself. I default to ordering in or picking up fast food. I love fancy restaurants. I still struggle to spend money on some things. And yet on other things, I ll drop thousands of dollars without blinking. The biggest change for me has been motivation. I used to love working and now I m struggling to be excited about anything. The biggest challenge is teaching my kids how to be self sufficient. I had house cleaners and cancelled the service. We do our own house chores. I do my own yard work. I still want them to have pride in ownership and recognize that there isn t a magical money tree.'

In [27]:
len(reddit_data['Comment'][0].split())

34

In [35]:
small_comments = reddit_data[reddit_data['Comment'].map(len) <= 8]

In [40]:
small_comments

Unnamed: 0,Comment,Reply
192,Title,My college crush asked me out on a date and I ...
225,Thanks!,"Show, don't tell. Show them a lot of attention..."
342,Source:,This is an open question! Sort of! It has a un...
343,This one,"As an actual expert in visual perception, allo..."
432,Title!,"Yes, there is a more or less standard way of s..."
841,Light,The ice cream machine was working?!?
1360,Video,Vertical Video. The lift doesn't count.
1390,Article:,Drinking more water makes me more awake becaus...
1444,Video:,My ankles tend to be fairly prone to twisting ...
1492,Video:,Visiting from /r/all. Does this workout allow ...


In [44]:
reddit_data = reddit_data.drop(list(small_comments.index))

In [45]:
reddit_data

Unnamed: 0,Comment,Reply
0,Basically it goes like this. Somebody posts a ...,"Today, our commenters were pretty cool guys"
1,She claims she thought her stuff was too girly...,Read the bottle if it says shower gel or body ...
2,"Thanks, and have a good day.",The mods' feelings about any posts/comments re...
3,"Kidding. Relax. It's next Sunday, don't forget.",HAHAHA PSYCH YOU TOTALLY GOT US ALL OP YOU'RE ...
4,Background story: I've been dating my current ...,The moral of the story is: Work up an appetite...
...,...,...
5502,I never posted here and mostly just lurk occas...,This is how the place has always been. There a...
5503,It feels like a good percentage of this sub is...,right now the only thing i see on this subredd...
5505,"We have very few submissions, but quite a lot ...",deleted 0.94407
5506,"Posters to r/Zen, please terse Zen like saying...",Definitely agree with this. A lot of the riddl...
