In [1]:
import requests
import pandas as pd
import time
import random

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
# url_1 = 'https://www.reddit.com/r/boardgames/new.json'
# url_2 = 'https://www.reddit.com/r/AskMen/new.json'

legaladvice_url = 'https://www.reddit.com/r/legaladvice/new.json'
relationshipadvice_url = 'https://www.reddit.com/r/relationship_advice/new.json'

# showerthoughts_url = 'https://www.reddit.com/r/Showerthoughts/new.json'
# stonerphilosophy_url = 'https://www.reddit.com/r/StonerPhilosophy/new.json'

# twosentencehorror_url = 'https://www.reddit.com/r/TwoSentenceHorror/new.json'
# twosentencecomedy_url = 'https://www.reddit.com/r/TwoSentenceComedy/new.json'

In [3]:
# base_urls = {'boardgames' : url_1, 'askmen' : url_2}

base_urls = {'legaladvice' : legaladvice_url, 'relationshipadvice' : relationshipadvice_url}

# base_urls = {'showerthoughts' : showerthoughts_url, 'stonerphilosophy' : stonerphilosophy_url}

# base_urls = {'twosentencehorror' : twosentencehorror_url, 'twosentencecomedy' : twosentencecomedy_url}

In [4]:
params = {'limit' : 100, 'after' : None}

In [5]:
headers = {'User-agent': 'agent j. bourne'}

In [6]:
all_posts = {}

# Looping through dictionary of subreddit names and URLs.
for subred, url in base_urls.items():
    
    # Printing current subreddit being scraped.
    print(f'\nSCRAPING SUBREDDIT: {subred}')
    
    # Reseting 'after' parameter to be None before first scraping attempt.
    # This parameter helps to keep track of last post from each successful scrape attempt,
    # so that next scrape is from the previous scrape's last post onwards.
    params['after'] = None
    
    # Initiating a new list for the current subreddit being scraped.
    all_posts[subred] = []
    
    # Counter to keep track of no. of total posts scraped so far. This will be used in the while loop below.
    posts_scraped = 0
    
    # While loop to scrape posts from current subreddit.
    while posts_scraped < 1000:
        
        # Requesting data from reddit API with custom user-agent in headers, and 'limit'=100 and 'after' as parameters.
        response = requests.get(url, headers=headers, params=params)
        
        # Checking for successful response from API.
        if response.status_code == 200:
            print("\nSuccessful scraping attempt.")
        else:
            print(f"\nUnsuccessful scraping attempt. Error status code: {response.status_code}")
            break
        
        response_dict = response.json()
        
        # Preparing list of all posts scraped in this attempt using list comprehension.
        current_posts = [posts['data'] for posts in response_dict['data']['children']]
        
        # Updating counter of no. of total posts scraped so far.
        # Printing relevant info messages about no. of posts scraped.
        posts_scraped += len(current_posts)
        print(f'Posts scraped in this attempt = {len(current_posts)}')
        print(f'Total posts scraped so far    = {posts_scraped}')
        
        # Updating 'after' parameter with the tag of last post scraped in this attempt,
        # so that next scrape attempt is from this post onwards.
        params['after'] = response_dict['data']['after']
        
        # Extending the list of posts scraped so far from current subreddit by adding posts scraped in this attempt.
        all_posts[subred].extend(current_posts)
        
        # Saving posts scraped so far as a CSV file (overwriting any previously written CSV file).
        pd.DataFrame(all_posts[subred]).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        print(f'Saved posts to CSV: {subred}_raw_posts.csv')
        
        
        # Using try-except loop.
        # Trying to read an existing CSV file with posts scraped from current subreddit, if any.
        # If CSV file exists and is read successfully, then just append (mode='a') posts scraped in this attempt ('current_posts') into the existing CSV file (with header=False).
        # If any error occurs during reading existing CSV file (most likely FileNotFoundError), then write (mode='w') posts scraped in this attempt into new file (with header=True).
        #try:
        #    saved_posts = pd.read_csv(f'../data/{subred}_raw_posts.csv')
        #    print(f'Existing CSV found: {subred}_raw_posts.csv')
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='a', header=False, index=False)
        #    print(f'Updated existing CSV.')
        #    
        #except:
        #    pd.DataFrame(current_posts).to_csv(f'../data/{subred}_raw_posts.csv', mode='w', header=True, index=False)
        #    print(f'New CSV created: {subred}_raw_posts.csv')
            
        
        # Setting a random sleep timer of 5-15 seconds so as to not bombard reddit server with multiple requests within seconds.
        sleep_duration = random.randint(5,15)
        print(f'Sleeping for {sleep_duration} seconds...')
        time.sleep(sleep_duration)
    
print('\nDone.')


SCRAPING SUBREDDIT: legaladvice

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 100
Saved posts to CSV: legaladvice_raw_posts.csv
Sleeping for 11 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 200
Saved posts to CSV: legaladvice_raw_posts.csv
Sleeping for 11 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 300
Saved posts to CSV: legaladvice_raw_posts.csv
Sleeping for 5 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 400
Saved posts to CSV: legaladvice_raw_posts.csv
Sleeping for 6 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far    = 500
Saved posts to CSV: legaladvice_raw_posts.csv
Sleeping for 15 seconds...

Successful scraping attempt.
Posts scraped in this attempt = 100
Total posts scraped so far  

In [7]:
# Checking 'all_posts' dictionary to ensure the desired subreddits and posts are correctly scraped.

all_posts.keys()

dict_keys(['legaladvice', 'relationshipadvice'])

**Checking posts scraped from subreddit *'legaladvice'***

Number of posts in *all_posts* dictionary must be same as in *legaladvice_raw_posts.csv*

In [8]:
len(all_posts['legaladvice'])

1097

In [13]:
# Converting to dataframe

legaladvice_df = pd.DataFrame(all_posts['legaladvice'])

In [14]:
legaladvice_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,link_flair_template_id
0,,legaladvice,I (19M) was with my abusive ex for about 5 mon...,t2_66b703t5,False,,0,False,"(AZ, USA) My abusive ex destroyed my stuff",[],r/legaladvice,False,6,,0,True,t3_ggjbe9,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggjbe9,True,,Independent_Scale,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggjbe9/az_usa_my_abusi...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
1,,legaladvice,Our backyard is small only 20 ft wide. Our nei...,t2_5z5khlwm,False,,0,False,Neighbors encroached 1.5ft with new femce and ...,[],r/legaladvice,False,6,,0,True,t3_ggj9vg,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj9vg,True,,1he_8igger_1h3_Beter,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj9vg/neighbors_encro...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
2,,legaladvice,"Hi, I am a worker who was temporarily furlough...",t2_55j1c6ak,False,,0,False,[MN] Would I still receive unemployment benefi...,[],r/legaladvice,False,6,,0,True,t3_ggj838,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj838,True,,AlternatorDrounds,,1,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj838/mn_would_i_stil...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
3,,legaladvice,I live in Belgium with my 2 daughters and my p...,t2_5ug9l5mp,False,,0,False,My baby’s dad is kicking me out of the house b...,[],r/legaladvice,False,6,,0,True,t3_ggj52z,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj52z,True,,butterlight,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj52z/my_babys_dad_is...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
4,,legaladvice,I got divorced in 2017 in Vermont. The custod...,t2_5qxtj,False,,0,False,What are my options if my ex husband takes my ...,[],r/legaladvice,False,6,,0,True,t3_ggj3sy,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589072000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj3sy,True,,katrilli,,5,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj3sy/what_are_my_opt...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,


In [15]:
legaladvice_df.shape

(1097, 102)

In [18]:
# Checking for unique posts in total scraped posts.

len(legaladvice_df['name'].unique())

998

In [24]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

legaladvice_from_csv = pd.read_csv('../data/legaladvice_raw_posts.csv')

In [25]:
legaladvice_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,link_flair_template_id
0,,legaladvice,I (19M) was with my abusive ex for about 5 mon...,t2_66b703t5,False,,0,False,"(AZ, USA) My abusive ex destroyed my stuff",[],r/legaladvice,False,6,,0,True,t3_ggjbe9,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggjbe9,True,,Independent_Scale,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggjbe9/az_usa_my_abusi...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
1,,legaladvice,Our backyard is small only 20 ft wide. Our nei...,t2_5z5khlwm,False,,0,False,Neighbors encroached 1.5ft with new femce and ...,[],r/legaladvice,False,6,,0,True,t3_ggj9vg,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj9vg,True,,1he_8igger_1h3_Beter,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj9vg/neighbors_encro...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
2,,legaladvice,"Hi, I am a worker who was temporarily furlough...",t2_55j1c6ak,False,,0,False,[MN] Would I still receive unemployment benefi...,[],r/legaladvice,False,6,,0,True,t3_ggj838,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj838,True,,AlternatorDrounds,,1,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj838/mn_would_i_stil...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
3,,legaladvice,I live in Belgium with my 2 daughters and my p...,t2_5ug9l5mp,False,,0,False,My baby’s dad is kicking me out of the house b...,[],r/legaladvice,False,6,,0,True,t3_ggj52z,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj52z,True,,butterlight,,0,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj52z/my_babys_dad_is...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,
4,,legaladvice,I got divorced in 2017 in Vermont. The custod...,t2_5qxtj,False,,0,False,What are my options if my ex husband takes my ...,[],r/legaladvice,False,6,,0,True,t3_ggj3sy,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589072000.0,text,6,,,text,self.legaladvice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2rawz,,,,ggj3sy,True,,katrilli,,5,True,all_ads,False,[],False,,/r/legaladvice/comments/ggj3sy/what_are_my_opt...,all_ads,False,https://www.reddit.com/r/legaladvice/comments/...,1237033,1589044000.0,0,,False,


In [26]:
# Ensuring shape of df from CSV is same as above.

legaladvice_from_csv.shape

(1097, 102)

In [27]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(legaladvice_from_csv['name'].unique())

998

**Checking posts scraped from subreddit *'relationshipadvice'***

Number of posts in *all_posts* dictionary must be same as in *relationshipadvice_raw_posts.csv*

In [19]:
len(all_posts['relationshipadvice'])

1098

In [20]:
# Converting to dataframe

relationshipadvice_df = pd.DataFrame(all_posts['relationshipadvice'])

In [21]:
relationshipadvice_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
0,,relationship_advice,We been dating for almost 2 years now and he s...,t2_54fr010z,False,,0,False,Should I be upset if my boyfriend doesn’t reme...,[],r/relationship_advice,False,6,,0,True,t3_ggjd4u,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjd4u,True,,lalaxx3,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjd4u/should_...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
1,,relationship_advice,I'll split my situation into parts so its easi...,t2_k6tv9,False,,0,False,What advice could you all give me ?,[],r/relationship_advice,False,6,,0,True,t3_ggjc7u,False,dark,0.99,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjc7u,True,,nicomoli12,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjc7u/what_ad...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
2,,relationship_advice,So this is going to be long. I'm not sure if t...,t2_1aasy0iv,False,,0,False,Confused. Posting here for advice on my relati...,[],r/relationship_advice,False,6,,0,True,t3_ggjc0o,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjc0o,True,,SuKh22,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjc0o/confuse...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
3,,relationship_advice,Background: I’m usually not one to look for ad...,t2_5kww2hvk,False,,0,False,My (26F) Father Sold My Boyfriend (31M) a Brok...,[],r/relationship_advice,False,6,,0,True,t3_ggjbue,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjbue,True,,refreshmysoul,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjbue/my_26f_...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
4,,relationship_advice,Originally the plan was that my sister was goi...,t2_654bl0xt,False,,0,False,"My mother keeps changing plans for moving, tel...",[],r/relationship_advice,False,6,,0,True,t3_ggjbt1,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjbt1,True,,ParanoiaFear,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjbt1/my_moth...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False


In [22]:
relationshipadvice_df.shape

(1098, 101)

In [32]:
# Checking for unique posts in total scraped posts.

len(relationshipadvice_df['name'].unique())

1002

In [28]:
# Reading the CSV file as dataframe to ensure all data got saved correctly.

relationshipadvice_from_csv = pd.read_csv('../data/relationshipadvice_raw_posts.csv')

In [29]:
relationshipadvice_from_csv.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,author_flair_background_color,subreddit_type,ups,total_awards_received,media_embed,author_flair_template_id,is_original_content,user_reports,secure_media,is_reddit_media_domain,is_meta,category,secure_media_embed,link_flair_text,can_mod_post,score,approved_by,author_premium,thumbnail,edited,author_flair_css_class,author_flair_richtext,gildings,content_categories,is_self,mod_note,created,link_flair_type,wls,removed_by_category,banned_by,author_flair_type,domain,allow_live_comments,selftext_html,likes,suggested_sort,banned_at_utc,view_count,archived,no_follow,is_crosspostable,pinned,over_18,all_awardings,awarders,media_only,can_gild,spoiler,locked,author_flair_text,treatment_tags,visited,removed_by,num_reports,distinguished,subreddit_id,mod_reason_by,removal_reason,link_flair_background_color,id,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
0,,relationship_advice,We been dating for almost 2 years now and he s...,t2_54fr010z,False,,0,False,Should I be upset if my boyfriend doesn’t reme...,[],r/relationship_advice,False,6,,0,True,t3_ggjd4u,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjd4u,True,,lalaxx3,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjd4u/should_...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
1,,relationship_advice,I'll split my situation into parts so its easi...,t2_k6tv9,False,,0,False,What advice could you all give me ?,[],r/relationship_advice,False,6,,0,True,t3_ggjc7u,False,dark,0.99,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjc7u,True,,nicomoli12,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjc7u/what_ad...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
2,,relationship_advice,So this is going to be long. I'm not sure if t...,t2_1aasy0iv,False,,0,False,Confused. Posting here for advice on my relati...,[],r/relationship_advice,False,6,,0,True,t3_ggjc0o,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjc0o,True,,SuKh22,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjc0o/confuse...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
3,,relationship_advice,Background: I’m usually not one to look for ad...,t2_5kww2hvk,False,,0,False,My (26F) Father Sold My Boyfriend (31M) a Brok...,[],r/relationship_advice,False,6,,0,True,t3_ggjbue,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjbue,True,,refreshmysoul,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjbue/my_26f_...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False
4,,relationship_advice,Originally the plan was that my sister was goi...,t2_654bl0xt,False,,0,False,"My mother keeps changing plans for moving, tel...",[],r/relationship_advice,False,6,,0,True,t3_ggjbt1,False,dark,1.0,,public,1,0,{},,False,[],,False,False,,{},,False,1,,False,,False,,[],{},,True,,1589073000.0,text,6,,,text,self.relationship_advice,False,"&lt;!-- SC_OFF --&gt;&lt;div class=""md""&gt;&lt...",,,,,False,True,False,False,False,[],[],False,False,False,False,,[],False,,,,t5_2r0cn,,,,ggjbt1,True,,ParanoiaFear,,0,True,all_ads,False,[],False,,/r/relationship_advice/comments/ggjbt1/my_moth...,all_ads,False,https://www.reddit.com/r/relationship_advice/c...,3026114,1589044000.0,0,,False


In [30]:
# Ensuring shape of df from CSV is same as above.

relationshipadvice_from_csv.shape

(1098, 101)

In [31]:
# Checking to ensure no. of unique posts in CSV is same as above.

len(relationshipadvice_from_csv['name'].unique())

1002