In [1]:
import requests
import pandas as pd
import time
import random

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# url_1 = 'https://www.reddit.com/r/boardgames/new.json'
# url_2 = 'https://www.reddit.com/r/AskMen/new.json'

# legaladvice_url = 'https://www.reddit.com/r/legaladvice/new.json'
# relationshipadvice_url = 'https://www.reddit.com/r/relationship_advice/new.json'

# showerthoughts_url = 'https://www.reddit.com/r/Showerthoughts/new.json'
# stonerphilosophy_url = 'https://www.reddit.com/r/StonerPhilosophy/new.json'

twosentencehorror_url = 'https://www.reddit.com/r/TwoSentenceHorror/new.json'
twosentencecomedy_url = 'https://www.reddit.com/r/TwoSentenceComedy/new.json'

In [3]:
# base_urls = {'boardgames' : url_1, 'askmen' : url_2}

# base_urls = {'legaladvice' : legaladvice_url, 'relationshipadvice' : relationshipadvice_url}

# base_urls = {'showerthoughts' : showerthoughts_url, 'stonerphilosophy' : stonerphilosophy_url}

base_urls = {'twosentencehorror' : twosentencehorror_url, 'twosentencecomedy' : twosentencecomedy_url}

In [4]:
params = {'limit' : 100, 'after' : None}

In [5]:
headers = {'User-agent': 'agent k. bourne'}

In [6]:
all_posts = {}

# Looping through dictionary of subreddit names and URLs.
for subred, url in base_urls.items():
    
    # Printing current subreddit being scraped.
    print(f'\nSCRAPING SUBREDDIT: {subred}')
    
    # Reseting 'after' parameter to be None before first scraping attempt.
    # This parameter helps to keep track of last post from each successful scrape attempt,
    # so that next scrape is from the previous scrape's last post onwards.
    params['after'] = None
    
    # Initiating a new list for the current subreddit being scraped.
    all_posts[subred] = []
    
    # Counter to keep track of no. of total posts scraped so far. This will be used in the while loop below.
    posts_scraped = 0
    
    # While loop to scrape posts from current subreddit.
    while posts_scraped < 1000:
        
        # Requesting data from reddit API with custom user-agent in headers, and 'limit'=100 and 'after' as parameters.
        response = requests.get(url, headers=headers, params=params)
        
        # Checking for successful response from API.
        if response.status_code == 200:
            print("\nSuccessful scraping attempt.")
        else:
            print(f"\nUnsuccessful scraping attempt. Error status code: {response.status_code}")
            break
        
        response_dict = response.json()
        
        # Preparing list of all posts scraped in this attempt using list comprehension.
        current_posts = [posts['data'] for posts in response_dict['data']['children']]
        
        # Updating counter of no. of total posts scraped so far.
        # Printing relevant info messages about no. of posts scraped.
        posts_scraped += len(current_posts)
        print(f'Posts scraped in this attempt = {len(current_posts)}')
        print(f'Total posts scraped so far    = {posts_scraped}')
        
        # Updating 'after' parameter with the tag of last post scraped in this attempt,
        # so that next scrape attempt is from this post onwards.
        params['after'] = response_dict['data']['after']
        
        # Extending the list of posts scraped so far from current subreddit by adding posts scraped in this attempt.
        all_posts[subred].extend(current_posts)
        
        # Saving posts scraped so far as a CSV file (overwriting any previously written CSV file).
        pd.DataFrame(all_posts[subred]).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        print(f'Saved posts to CSV: {subred}_raw_posts.csv')
        
        
        # Using try-except loop.
        # Trying to read an existing CSV file with posts scraped from current subreddit, if any.
        # If CSV file exists and is read successfully, then just append (mode='a') posts scraped in this attempt ('current_posts') into the existing CSV file (with header=False).
        # If any error occurs during reading existing CSV file (most likely FileNotFoundError), then write (mode='w') posts scraped in this attempt into new file (with header=True).
        #try:
        #    saved_posts = pd.read_csv(f'../data/{subred}_raw_posts.csv')
        #    print(f'Existing CSV found: {subred}_raw_posts.csv')
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='a', header=False, index=False)
        #    print(f'Updated existing CSV.')
        #    
        #except:
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        #    print(f'New CSV created: {subred}_raw_posts.csv')
            
        
        # Setting a random sleep timer of 5-15 seconds so as to not bombard reddit server with multiple requests within seconds.
        sleep_duration = random.randint(5,15)
        print(f'Sleeping for {sleep_duration} seconds...')
        time.sleep(sleep_duration)
    
print('\nDone.')


SCRAPING SUBREDDIT: twosentencehorror

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 100
Saved posts to CSV: twosentencehorror_raw_posts.csv
Sleeping for 9 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 200
Saved posts to CSV: twosentencehorror_raw_posts.csv
Sleeping for 15 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 300
Saved posts to CSV: twosentencehorror_raw_posts.csv
Sleeping for 14 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 400
Saved posts to CSV: twosentencehorror_raw_posts.csv
Sleeping for 14 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 500
Saved posts to CSV: twosentencehorror_raw_posts.csv
Sleeping for 12 seconds...

Successful scraping attempt.
Posts scraped in this attem

In [7]:
# Checking 'all_posts' dictionary to ensure the desired subreddits and posts are correctly scraped.

all_posts.keys()

dict_keys(['twosentencehorror', 'twosentencecomedy'])

**Checking posts scraped from subreddit *'twosentencehorror'***

Number of posts in *all_posts* dictionary must be same as in *twosentencehorror_raw_posts.csv*

In [8]:
len(all_posts['twosentencehorror'])

1097

In [9]:
# Converting to dataframe

twosentencehorror_df = pd.DataFrame(all_posts['twosentencehorror'])

In [10]:
twosentencehorror_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,author_cakeday,link_flair_template_id
0,,TwoSentenceHorror,"As I was about to give into the darkness, I he...",t2_5oncx5gq,False,,0,False,"Helplessly, I collapsed from the pain and exha...",[],r/TwoSentenceHorror,False,6,,0,True,t3_ggk0bw,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggk0bw,True,,Luminoustygian,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggk0bw/helplessl...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589047000.0,0,,False,,
1,,TwoSentenceHorror,Local spa remains closed following float tank ...,t2_hv907,False,,0,False,As my sleep paralysis demon applied some light...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjzxf,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjzxf,True,,Jimmy_Two_Shoes,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjzxf/as_my_sle...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589047000.0,0,,False,,
2,,TwoSentenceHorror,Those were the last words I heard crackling fr...,t2_3ulsvgjr,False,,0,False,It’s a beautiful day.,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjtxj,False,dark,0.99,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjtxj,True,,Thin-White-Duke97,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjtxj/its_a_bea...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,
3,,TwoSentenceHorror,They should have considered our feelings when ...,t2_ixik8,False,,0,False,I had to console my deeply sobbing wife after ...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjtjc,False,dark,1.0,,public,4,0,{},,False,[],,False,False,,{},,False,4,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjtjc,True,,h70541,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjtjc/i_had_to_...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,
4,,TwoSentenceHorror,"Instead, I'm going to do what's called a pro g...",t2_26c9nu68,False,,0,False,I'm a pilot and my passenger plane has been hi...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjr2i,False,dark,0.67,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjr2i,True,,Jzard,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjr2i/im_a_pilo...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,


In [11]:
twosentencehorror_df.shape

(1097, 103)

In [12]:
# Checking for unique posts in total scraped posts.

len(twosentencehorror_df['name'].unique())

997

In [13]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

twosentencehorror_from_csv = pd.read_csv('../data/twosentencehorror_raw_posts.csv')

In [14]:
twosentencehorror_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,author_cakeday,link_flair_template_id
0,,TwoSentenceHorror,"As I was about to give into the darkness, I he...",t2_5oncx5gq,False,,0,False,"Helplessly, I collapsed from the pain and exha...",[],r/TwoSentenceHorror,False,6,,0,True,t3_ggk0bw,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggk0bw,True,,Luminoustygian,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggk0bw/helplessl...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589047000.0,0,,False,,
1,,TwoSentenceHorror,Local spa remains closed following float tank ...,t2_hv907,False,,0,False,As my sleep paralysis demon applied some light...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjzxf,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjzxf,True,,Jimmy_Two_Shoes,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjzxf/as_my_sle...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589047000.0,0,,False,,
2,,TwoSentenceHorror,Those were the last words I heard crackling fr...,t2_3ulsvgjr,False,,0,False,It’s a beautiful day.,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjtxj,False,dark,0.99,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjtxj,True,,Thin-White-Duke97,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjtxj/its_a_bea...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,
3,,TwoSentenceHorror,They should have considered our feelings when ...,t2_ixik8,False,,0,False,I had to console my deeply sobbing wife after ...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjtjc,False,dark,1.0,,public,4,0,{},,False,[],,False,False,,{},,False,4,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjtjc,True,,h70541,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjtjc/i_had_to_...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,
4,,TwoSentenceHorror,"Instead, I'm going to do what's called a pro g...",t2_26c9nu68,False,,0,False,I'm a pilot and my passenger plane has been hi...,[],r/TwoSentenceHorror,False,6,,0,True,t3_ggjr2i,False,dark,0.67,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589075000.0,text,6,,,text,self.TwoSentenceHorror,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_30tmh,,,,ggjr2i,True,,Jzard,,0,True,all_ads,False,[],False,,/r/TwoSentenceHorror/comments/ggjr2i/im_a_pilo...,all_ads,False,https://www.reddit.com/r/TwoSentenceHorror/com...,376029,1589046000.0,0,,False,,


In [15]:
# Ensuring shape of df from CSV is same as above.

twosentencehorror_from_csv.shape

(1097, 103)

In [16]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(twosentencehorror_from_csv['name'].unique())

997

**Checking posts scraped from subreddit *'twosentencecomedy'***

Number of posts in *all_posts* dictionary must be same as in *twosentencecomedy_raw_posts.csv*

In [17]:
len(all_posts['twosentencecomedy'])

1072

In [18]:
# Converting to dataframe

twosentencecomedy_df = pd.DataFrame(all_posts['twosentencecomedy'])

In [19]:
twosentencecomedy_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,thumbnail_width,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,post_hint,preview
0,,TwoSentenceComedy,It was at that point I knew I had won the limb...,t2_63v2i7nd,False,,0,False,"The Judge looked down upon me and bellowed, “M...",[],r/TwoSentenceComedy,False,,,0,,False,t3_gggjzn,False,dark,1.0,,public,3,0,{},,,False,[],,False,False,,{},,False,3,,False,self,False,,[],{},,True,,1589064000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,gggjzn,True,,plenty0fme,,0,True,,False,[],False,,/r/TwoSentenceComedy/comments/gggjzn/the_judge...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589035000.0,0,,False,,
1,,TwoSentenceComedy,I wouldn't have done it if no one was around,t2_5hdakljr,False,,0,False,I looked around to see if there was anyone aro...,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggfoui,False,dark,1.0,,public,2,0,{},,,False,[],,False,False,,{},,False,2,,False,self,False,,[],{},,True,,1589061000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggfoui,True,,Passionate_Writing_,,3,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggfoui/i_looked_...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589032000.0,0,,False,,
2,,TwoSentenceComedy,But we ran out of milk and she turns her nose ...,t2_21k04zhn,False,,0,False,I was gonna give her breakfast in bed....,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggfns0,False,dark,1.0,,public,1,0,{},,,False,[],,False,False,,{},,False,1,,True,self,False,,[],{},,True,,1589061000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggfns0,True,,mydadsnameisharold,,0,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggfns0/i_was_gon...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589032000.0,0,,False,,
3,,TwoSentenceComedy,Sign language.,t2_3xc99ad8,False,,0,False,What is the least spoken language om earth?,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggf89y,False,dark,0.98,,public,113,0,{},,,False,[],,False,False,,{},,False,113,,False,self,False,,[],{},,True,,1589059000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggf89y,True,,schepschutter,,3,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggf89y/what_is_t...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589030000.0,0,,False,,
4,,TwoSentenceComedy,My eyes darted to the empty hook by the toilet...,t2_21k04zhn,False,,0,False,I pulled open the shower curtain and saw the m...,[],r/TwoSentenceComedy,False,,,0,,False,t3_gfyuh7,False,dark,0.97,,public,208,0,{},,,False,[],,False,False,,{},,False,208,,True,self,False,,[],{},,True,,1588992000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,gfyuh7,True,,mydadsnameisharold,,9,True,,False,[],False,,/r/TwoSentenceComedy/comments/gfyuh7/i_pulled_...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1588963000.0,0,,False,,


In [20]:
twosentencecomedy_df.shape

(1072, 105)

In [21]:
# Checking for unique posts in total scraped posts.

len(twosentencecomedy_df['name'].unique())

672

In [22]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

twosentencecomedy_from_csv = pd.read_csv('../data/twosentencecomedy_raw_posts.csv')

In [23]:
twosentencecomedy_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,thumbnail_width,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,post_hint,preview
0,,TwoSentenceComedy,It was at that point I knew I had won the limb...,t2_63v2i7nd,False,,0,False,"The Judge looked down upon me and bellowed, “M...",[],r/TwoSentenceComedy,False,,,0,,False,t3_gggjzn,False,dark,1.0,,public,3,0,{},,,False,[],,False,False,,{},,False,3,,False,self,False,,[],{},,True,,1589064000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,gggjzn,True,,plenty0fme,,0,True,,False,[],False,,/r/TwoSentenceComedy/comments/gggjzn/the_judge...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589035000.0,0,,False,,
1,,TwoSentenceComedy,I wouldn't have done it if no one was around,t2_5hdakljr,False,,0,False,I looked around to see if there was anyone aro...,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggfoui,False,dark,1.0,,public,2,0,{},,,False,[],,False,False,,{},,False,2,,False,self,False,,[],{},,True,,1589061000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggfoui,True,,Passionate_Writing_,,3,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggfoui/i_looked_...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589032000.0,0,,False,,
2,,TwoSentenceComedy,But we ran out of milk and she turns her nose ...,t2_21k04zhn,False,,0,False,I was gonna give her breakfast in bed....,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggfns0,False,dark,1.0,,public,1,0,{},,,False,[],,False,False,,{},,False,1,,True,self,False,,[],{},,True,,1589061000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggfns0,True,,mydadsnameisharold,,0,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggfns0/i_was_gon...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589032000.0,0,,False,,
3,,TwoSentenceComedy,Sign language.,t2_3xc99ad8,False,,0,False,What is the least spoken language om earth?,[],r/TwoSentenceComedy,False,,,0,,False,t3_ggf89y,False,dark,0.98,,public,113,0,{},,,False,[],,False,False,,{},,False,113,,False,self,False,,[],{},,True,,1589059000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,ggf89y,True,,schepschutter,,3,True,,False,[],False,,/r/TwoSentenceComedy/comments/ggf89y/what_is_t...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1589030000.0,0,,False,,
4,,TwoSentenceComedy,My eyes darted to the empty hook by the toilet...,t2_21k04zhn,False,,0,False,I pulled open the shower curtain and saw the m...,[],r/TwoSentenceComedy,False,,,0,,False,t3_gfyuh7,False,dark,0.97,,public,208,0,{},,,False,[],,False,False,,{},,False,208,,True,self,False,,[],{},,True,,1588992000.0,text,,,,text,self.TwoSentenceComedy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_ldsx1,,,,gfyuh7,True,,mydadsnameisharold,,9,True,,False,[],False,,/r/TwoSentenceComedy/comments/gfyuh7/i_pulled_...,,False,https://www.reddit.com/r/TwoSentenceComedy/com...,9880,1588963000.0,0,,False,,


In [24]:
# Ensuring shape of df from CSV is same as above.

twosentencecomedy_from_csv.shape

(1072, 105)

In [25]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(twosentencecomedy_from_csv['name'].unique())

672