# Social Influence Study - Reddit Posts Popularity


## Table of contents
1. Data gathering setup and experiment
2. Data Analysis

### Imports

In [185]:
import os
import praw
import dotenv
import random
from datetime import datetime
import pandas as pd

### Setup env
create a `.env` file in the same directory as this notebook and pass there all the secrets in the form:

```
CLIENT_ID="your_client_id"
CLIENT_SECRET="your_client_secret"
USERNAME="your_username"
PASSWORD="your password"
```

In [186]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [187]:
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
USERNAME = os.getenv("USERNAME")
PASSWORD = os.getenv("PASSWORD")

### Setup the reddit instance

In [188]:
def user_login(client_id, client_secret,username,password,user_agent):
    # reddit api login
    reddit = praw.Reddit(client_id=client_id,
                         client_secret=client_secret,
                         username=username,
                         password=password,
                         user_agent=user_agent)
    return reddit

### Get post metadata

In [189]:
def get_post_metadata(post):
    metadata = {
        "id": post.id,
        "type": None, # Control, Treatment
        "title": post.title,
        "created": datetime.fromtimestamp(post.created_utc).strftime("%Y-%m-%d %H:%M:%S"),
        "collected": datetime.today().date().isoformat(),
        "subreddit": post.subreddit.display_name,
        "upvotes": post.ups,
        "downvotes": post.downs,
        "score": post.score,
        "n_comments": post.num_comments,
        "url": post.url
    }
    return metadata

### Get batch of new posts

In [190]:
def get_posts(
    reddit_instance,
    subreddit = "all",
    score = 1,
    n_comments = 0,
    batch_size = 200,
):
    print("=== Getting Posts ===")
    new_posts = [
        post for post in reddit_instance.subreddit(subreddit).new(limit=batch_size)
        if post.score <= score and post.num_comments <= n_comments
    ]

    return new_posts

### Assign posts to either Treatment or Control for the sake of the experiment

In [199]:
def run_experiment(posts):
    
    print("=== Running the experiment ===")
    posts_done = []
    random.shuffle(posts)
    
    # Make the list even
    if len(posts) % 2 != 0:
        posts.pop()
        
    half = len(posts) // 2
    
    # Assign posts
    control = posts[half:]
    treatment = posts[:half]
    
    for post in control:
        try:
            post_meta = get_post_metadata(post)
            post_meta.update({"type" : "CONTROL"})
            posts_done.append(post_meta)
        except:
            print(f'Could not collect metadata of submission: {post["id"]}')
    
    for post in treatment[:len(posts_done)]: # Make sure that control, treatment is even
        try:
            post.upvote()
            post_meta = get_post_metadata(post)
            post_meta.update({"type" : "TREATMENT"})
            posts_done.append(post_meta)
        except:
            control_removed = False
            for post in posts_done:
                if post["type"] == "CONTROL" and not control_removed:
                    posts_done.remove(post)
                    control_removed = True
                    break
            print(f'Could not upvote submission: {post["id"]}')
        
    return posts_done

### Save posts to a CSV file

In [200]:
def posts_to_csv(posts_done):
    print("=== Saving to CSV ===")
    file_name = 'posts_done.csv'
    if os.path.isfile(file_name):
        # Load the existing csv
        df = pd.read_csv(file_name)
        # Concatenate with the new data
        df = pd.concat([df, pd.DataFrame(posts_done)])
    else:
        # Create a new dataframe
        df = pd.DataFrame(posts_done)
    
    # Save the dataframe to csv
    df.to_csv(file_name, index=False)
    print("=== Done ===")

### Main entrypoint

In [201]:
def main():
    reddit = user_login(CLIENT_ID, 
                        CLIENT_SECRET,
                        USERNAME,
                        PASSWORD,
                        'put here your user agent')
    
    posts = get_posts(reddit)
    posts_done = run_experiment(posts)
    posts_to_csv(posts_done)

In [209]:
main()
pd.read_csv("posts_done.csv")

=== Getting Posts ===
=== Running the experiment ===
Could not upvote submission: 12crnxf
=== Saving to CSV ===
=== Done ===


Unnamed: 0,id,type,title,created,collected,subreddit,upvotes,downvotes,score,n_comments,url
0,12cro57,CONTROL,"will cum trib any islander rn, dm me",2023-04-05 19:29:19,2023-04-05,loveislandhotties,1,0,1,0,https://www.reddit.com/r/loveislandhotties/com...
1,12crnx6,CONTROL,Calgary RE agent's licenses revoked for mislea...,2023-04-05 19:29:06,2023-04-05,CanadaHousing2,1,0,1,0,https://calgary.ctvnews.ca/calgary-real-estate...
2,12cro6o,CONTROL,Do I have to take Eliquis loading dose?,2023-04-05 19:29:22,2023-04-05,ClotSurvivors,1,0,1,0,https://www.reddit.com/r/ClotSurvivors/comment...
3,12cro08,CONTROL,📷 GTA 5 Modded accounts for sale $5 Billion 32...,2023-04-05 19:29:10,2023-04-05,gta5moddedoutfits_,1,0,1,0,https://www.reddit.com/r/gta5moddedoutfits_/co...
4,12cro41,CONTROL,If I plan on ending my first book on a cliffha...,2023-04-05 19:29:17,2023-04-05,writing,1,0,1,0,https://www.reddit.com/r/writing/comments/12cr...
...,...,...,...,...,...,...,...,...,...,...,...
169,12crnxw,TREATMENT,Amateur fisting party at the doctors,2023-04-05 19:29:07,2023-04-05,Rt5634r,1,0,1,0,https://www.google.com/amp/s/juant.online/42270
170,12cro5n,TREATMENT,🙂 MedArrive is hiring a remote Field Provider ...,2023-04-05 19:29:20,2023-04-05,himalayasapp,1,0,1,0,https://himalayas.app/companies/medarrive/jobs...
171,12cro26,TREATMENT,Finding The laughter In The Madness!#shorts,2023-04-05 19:29:14,2023-04-05,CoolCharacters,1,0,1,0,https://youtube.com/watch?v=cb57KX93Hm0&featur...
172,12crnzg,TREATMENT,"Thank You, Post",2023-04-05 19:29:09,2023-04-05,TetoKasane,1,0,1,0,https://www.reddit.com/r/TetoKasane/comments/1...
