In [1]:
import requests
import pandas as pd
import time
import random

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# url_1 = 'https://www.reddit.com/r/boardgames/new.json'
# url_2 = 'https://www.reddit.com/r/AskMen/new.json'

# legaladvice_url = 'https://www.reddit.com/r/legaladvice/new.json'
# relationshipadvice_url = 'https://www.reddit.com/r/relationship_advice/new.json'

showerthoughts_url = 'https://www.reddit.com/r/Showerthoughts/new.json'
stonerphilosophy_url = 'https://www.reddit.com/r/StonerPhilosophy/new.json'

# twosentencehorror_url = 'https://www.reddit.com/r/TwoSentenceHorror/new.json'
# twosentencecomedy_url = 'https://www.reddit.com/r/TwoSentenceComedy/new.json'

In [3]:
# base_urls = {'boardgames' : url_1, 'askmen' : url_2}

# base_urls = {'legaladvice' : legaladvice_url, 'relationshipadvice' : relationshipadvice_url}

base_urls = {'showerthoughts' : showerthoughts_url, 'stonerphilosophy' : stonerphilosophy_url}

# base_urls = {'twosentencehorror' : twosentencehorror_url, 'twosentencecomedy' : twosentencecomedy_url}

In [4]:
params = {'limit' : 100, 'after' : None}

In [5]:
headers = {'User-agent': 'agent j. bourne'}

In [6]:
all_posts = {}

# Looping through dictionary of subreddit names and URLs.
for subred, url in base_urls.items():
    
    # Printing current subreddit being scraped.
    print(f'\nSCRAPING SUBREDDIT: {subred}')
    
    # Reseting 'after' parameter to be None before first scraping attempt.
    # This parameter helps to keep track of last post from each successful scrape attempt,
    # so that next scrape is from the previous scrape's last post onwards.
    params['after'] = None
    
    # Initiating a new list for the current subreddit being scraped.
    all_posts[subred] = []
    
    # Counter to keep track of no. of total posts scraped so far. This will be used in the while loop below.
    posts_scraped = 0
    
    # While loop to scrape posts from current subreddit.
    while posts_scraped < 1000:
        
        # Requesting data from reddit API with custom user-agent in headers, and 'limit'=100 and 'after' as parameters.
        response = requests.get(url, headers=headers, params=params)
        
        # Checking for successful response from API.
        if response.status_code == 200:
            print("\nSuccessful scraping attempt.")
        else:
            print(f"\nUnsuccessful scraping attempt. Error status code: {response.status_code}")
            break
        
        response_dict = response.json()
        
        # Preparing list of all posts scraped in this attempt using list comprehension.
        current_posts = [posts['data'] for posts in response_dict['data']['children']]
        
        # Updating counter of no. of total posts scraped so far.
        # Printing relevant info messages about no. of posts scraped.
        posts_scraped += len(current_posts)
        print(f'Posts scraped in this attempt = {len(current_posts)}')
        print(f'Total posts scraped so far    = {posts_scraped}')
        
        # Updating 'after' parameter with the tag of last post scraped in this attempt,
        # so that next scrape attempt is from this post onwards.
        params['after'] = response_dict['data']['after']
        
        # Extending the list of posts scraped so far from current subreddit by adding posts scraped in this attempt.
        all_posts[subred].extend(current_posts)
        
        # Saving posts scraped so far as a CSV file (overwriting any previously written CSV file).
        pd.DataFrame(all_posts[subred]).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        print(f'Saved posts to CSV: {subred}_raw_posts.csv')
        
        
        # Using try-except loop.
        # Trying to read an existing CSV file with posts scraped from current subreddit, if any.
        # If CSV file exists and is read successfully, then just append (mode='a') posts scraped in this attempt ('current_posts') into the existing CSV file (with header=False).
        # If any error occurs during reading existing CSV file (most likely FileNotFoundError), then write (mode='w') posts scraped in this attempt into new file (with header=True).
        #try:
        #    saved_posts = pd.read_csv(f'../data/{subred}_raw_posts.csv')
        #    print(f'Existing CSV found: {subred}_raw_posts.csv')
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='a', header=False, index=False)
        #    print(f'Updated existing CSV.')
        #    
        #except:
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        #    print(f'New CSV created: {subred}_raw_posts.csv')
            
        
        # Setting a random sleep timer of 5-15 seconds so as to not bombard reddit server with multiple requests within seconds.
        sleep_duration = random.randint(5,15)
        print(f'Sleeping for {sleep_duration} seconds...')
        time.sleep(sleep_duration)
    
print('\nDone.')


SCRAPING SUBREDDIT: showerthoughts

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 100
Saved posts to CSV: showerthoughts_raw_posts.csv
Sleeping for 10 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 200
Saved posts to CSV: showerthoughts_raw_posts.csv
Sleeping for 6 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 300
Saved posts to CSV: showerthoughts_raw_posts.csv
Sleeping for 13 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 400
Saved posts to CSV: showerthoughts_raw_posts.csv
Sleeping for 5 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 500
Saved posts to CSV: showerthoughts_raw_posts.csv
Sleeping for 6 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts

In [7]:
# Checking 'all_posts' dictionary to ensure the desired subreddits and posts are correctly scraped.

all_posts.keys()

dict_keys(['showerthoughts', 'stonerphilosophy'])

**Checking posts scraped from subreddit *'showerthoughts'***

Number of posts in *all_posts* dictionary must be same as in *showerthoughts_raw_posts.csv*

In [8]:
len(all_posts['showerthoughts'])

1097

In [9]:
# Converting to dataframe

showerthoughts_df = pd.DataFrame(all_posts['showerthoughts'])

In [10]:
showerthoughts_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,thumbnail_width,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,post_hint,preview,author_cakeday
0,,Showerthoughts,,t2_2f4h60od,False,,0,False,"Most of these posts are high thoughts, chances...",[],r/Showerthoughts,False,6,,0,,True,t3_ggjveu,False,dark,0.8,,public,3,0,{},,,False,[],,False,False,,{},,False,3,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjveu,True,,mboutot,,1,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjveu/most_of_thes...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
1,,Showerthoughts,,t2_5z0vahke,False,,0,False,Animations might become good enough in the fut...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjt3u,False,dark,1.0,,public,2,0,{},,,False,[],,False,False,,{},,False,2,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjt3u,True,,DankTandon,,0,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjt3u/animations_m...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
2,,Showerthoughts,,t2_igdi1mn,False,,0,False,"If you've ever eaten a burger, you've essentia...",[],r/Showerthoughts,False,6,,0,,True,t3_ggjssk,False,dark,0.83,,public,4,0,{},,,False,[],,False,False,,{},,False,4,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjssk,True,,Ak171,,2,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjssk/if_youve_eve...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
3,,Showerthoughts,,t2_nvay03e,False,,0,False,If civilization ends and the next people find ...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjsbn,False,dark,1.0,,public,10,0,{},,,False,[],,False,False,,{},,False,10,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjsbn,True,,gypsypunkk,,2,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjsbn/if_civilizat...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
4,,Showerthoughts,,t2_chxn7,False,,0,False,People who complain about how ordering stuff o...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjsaa,False,dark,0.83,,public,4,0,{},,,False,[],,False,False,,{},,False,4,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjsaa,True,,parascrat,,0,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjsaa/people_who_c...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,


In [11]:
showerthoughts_df.shape

(1097, 106)

In [12]:
# Checking for unique posts in total scraped posts.

len(showerthoughts_df['name'].unique())

998

In [13]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

showerthoughts_from_csv = pd.read_csv('../data/showerthoughts_raw_posts.csv')

In [14]:
showerthoughts_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,thumbnail_width,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,post_hint,preview,author_cakeday
0,,Showerthoughts,,t2_2f4h60od,False,,0,False,"Most of these posts are high thoughts, chances...",[],r/Showerthoughts,False,6,,0,,True,t3_ggjveu,False,dark,0.8,,public,3,0,{},,,False,[],,False,False,,{},,False,3,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjveu,True,,mboutot,,1,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjveu/most_of_thes...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
1,,Showerthoughts,,t2_5z0vahke,False,,0,False,Animations might become good enough in the fut...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjt3u,False,dark,1.0,,public,2,0,{},,,False,[],,False,False,,{},,False,2,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjt3u,True,,DankTandon,,0,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjt3u/animations_m...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
2,,Showerthoughts,,t2_igdi1mn,False,,0,False,"If you've ever eaten a burger, you've essentia...",[],r/Showerthoughts,False,6,,0,,True,t3_ggjssk,False,dark,0.83,,public,4,0,{},,,False,[],,False,False,,{},,False,4,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjssk,True,,Ak171,,2,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjssk/if_youve_eve...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
3,,Showerthoughts,,t2_nvay03e,False,,0,False,If civilization ends and the next people find ...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjsbn,False,dark,1.0,,public,10,0,{},,,False,[],,False,False,,{},,False,10,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjsbn,True,,gypsypunkk,,2,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjsbn/if_civilizat...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,
4,,Showerthoughts,,t2_chxn7,False,,0,False,People who complain about how ordering stuff o...,[],r/Showerthoughts,False,6,,0,,True,t3_ggjsaa,False,dark,0.83,,public,4,0,{},,,False,[],,False,False,,{},,False,4,,False,self,False,,[],{},,True,,1589075000.0,text,6,,,text,self.Showerthoughts,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2szyo,,,,ggjsaa,True,,parascrat,,0,True,all_ads,False,[],False,,/r/Showerthoughts/comments/ggjsaa/people_who_c...,all_ads,False,https://www.reddit.com/r/Showerthoughts/commen...,19969042,1589046000.0,0,,False,,,


In [15]:
# Ensuring shape of df from CSV is same as above.

showerthoughts_from_csv.shape

(1097, 106)

In [16]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(showerthoughts_from_csv['name'].unique())

998

**Checking posts scraped from subreddit *'stonerphilosophy'***

Number of posts in *all_posts* dictionary must be same as in *stonerphilosophy_raw_posts.csv*

In [17]:
len(all_posts['stonerphilosophy'])

1000

In [18]:
# Converting to dataframe

stonerphilosophy_df = pd.DataFrame(all_posts['stonerphilosophy'])

In [19]:
stonerphilosophy_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,crosspost_parent_list,crosspost_parent,author_cakeday
0,,StonerPhilosophy,,t2_3amujz7c,False,,0,False,"Maybe you are actually born on earth, after yo...",[],r/StonerPhilosophy,False,,,0,True,t3_ggiz7t,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589072000.0,text,,,,text,self.StonerPhilosophy,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggiz7t,True,,nikintp,,0,True,,False,[],False,,/r/StonerPhilosophy/comments/ggiz7t/maybe_you_...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589043000.0,0,,False,,,
1,,StonerPhilosophy,,t2_5e0qtwon,False,,0,False,Why does each particle in the universe interac...,[],r/StonerPhilosophy,False,,,0,True,t3_ggicp1,False,dark,1.0,,public,3,0,{},,False,[],,False,False,,{},,False,3,,False,,False,,[],{},,True,,1589070000.0,text,,,,text,self.StonerPhilosophy,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggicp1,True,,universalprogenote,,3,True,,False,[],False,,/r/StonerPhilosophy/comments/ggicp1/why_does_e...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589041000.0,0,,False,,,
2,,StonerPhilosophy,Some kid:\nI'm 13 and i have premium\n\n\n\nCh...,t2_5gobw8sy,False,,0,False,"Even it's pornhub, that's some really wise advice",[],r/StonerPhilosophy,False,,,0,True,t3_ggi4b9,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589069000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggi4b9,True,,Nirvaner,,0,True,,False,[],False,,/r/StonerPhilosophy/comments/ggi4b9/even_its_p...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589041000.0,0,,False,,,
3,,StonerPhilosophy,It’s really annoying.,t2_3amujz7c,False,,0,False,Anyone’s heart beat rises while smoking up?,[],r/StonerPhilosophy,False,,,0,False,t3_ggftj4,False,dark,0.88,,public,6,0,{},,False,[],,False,False,,{},,False,6,,False,,False,,[],{},,True,,1589061000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggftj4,True,,nikintp,,9,True,,False,[],False,,/r/StonerPhilosophy/comments/ggftj4/anyones_he...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589033000.0,0,,False,,,
4,,StonerPhilosophy,"Sometimes when I smoke, I get anxious and a bi...",t2_7bejfvb,False,,0,False,Weed anxiety solution,[],r/StonerPhilosophy,False,,,0,False,t3_ggcbqc,False,dark,0.99,,public,279,0,{},,False,[],,False,False,,{},,False,279,,False,,False,,[],{},,True,,1589045000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggcbqc,True,,murrayjtm,,52,True,,False,[],False,,/r/StonerPhilosophy/comments/ggcbqc/weed_anxie...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589016000.0,0,,False,,,


In [20]:
stonerphilosophy_df.shape

(1000, 104)

In [21]:
# Checking for unique posts in total scraped posts.

len(stonerphilosophy_df['name'].unique())

1000

In [22]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

stonerphilosophy_from_csv = pd.read_csv('../data/stonerphilosophy_raw_posts.csv')

In [23]:
stonerphilosophy_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,crosspost_parent_list,crosspost_parent,author_cakeday
0,,StonerPhilosophy,,t2_3amujz7c,False,,0,False,"Maybe you are actually born on earth, after yo...",[],r/StonerPhilosophy,False,,,0,True,t3_ggiz7t,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589072000.0,text,,,,text,self.StonerPhilosophy,False,,,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggiz7t,True,,nikintp,,0,True,,False,[],False,,/r/StonerPhilosophy/comments/ggiz7t/maybe_you_...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589043000.0,0,,False,,,
1,,StonerPhilosophy,,t2_5e0qtwon,False,,0,False,Why does each particle in the universe interac...,[],r/StonerPhilosophy,False,,,0,True,t3_ggicp1,False,dark,1.0,,public,3,0,{},,False,[],,False,False,,{},,False,3,,False,,False,,[],{},,True,,1589070000.0,text,,,,text,self.StonerPhilosophy,False,,,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggicp1,True,,universalprogenote,,3,True,,False,[],False,,/r/StonerPhilosophy/comments/ggicp1/why_does_e...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589041000.0,0,,False,,,
2,,StonerPhilosophy,Some kid:\nI'm 13 and i have premium\n\n\n\nCh...,t2_5gobw8sy,False,,0,False,"Even it's pornhub, that's some really wise advice",[],r/StonerPhilosophy,False,,,0,True,t3_ggi4b9,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589069000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggi4b9,True,,Nirvaner,,0,True,,False,[],False,,/r/StonerPhilosophy/comments/ggi4b9/even_its_p...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589041000.0,0,,False,,,
3,,StonerPhilosophy,It’s really annoying.,t2_3amujz7c,False,,0,False,Anyone’s heart beat rises while smoking up?,[],r/StonerPhilosophy,False,,,0,False,t3_ggftj4,False,dark,0.88,,public,6,0,{},,False,[],,False,False,,{},,False,6,,False,,False,,[],{},,True,,1589061000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggftj4,True,,nikintp,,9,True,,False,[],False,,/r/StonerPhilosophy/comments/ggftj4/anyones_he...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589033000.0,0,,False,,,
4,,StonerPhilosophy,"Sometimes when I smoke, I get anxious and a bi...",t2_7bejfvb,False,,0,False,Weed anxiety solution,[],r/StonerPhilosophy,False,,,0,False,t3_ggcbqc,False,dark,0.99,,public,279,0,{},,False,[],,False,False,,{},,False,279,,False,,False,,[],{},,True,,1589045000.0,text,,,,text,self.StonerPhilosophy,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,False,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2s5se,,,,ggcbqc,True,,murrayjtm,,52,True,,False,[],False,,/r/StonerPhilosophy/comments/ggcbqc/weed_anxie...,,False,https://www.reddit.com/r/StonerPhilosophy/comm...,102623,1589016000.0,0,,False,,,


In [24]:
# Ensuring shape of df from CSV is same as above.

stonerphilosophy_from_csv.shape

(1000, 104)

In [25]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(stonerphilosophy_from_csv['name'].unique())

1000