## Data Collection

### Imports

In [23]:
# !pip install praw
import praw
import pandas as pd
import time
from sklearn.preprocessing import LabelEncoder

### Initializing Praw

In [2]:
#initilizing praw
reddit = praw.Reddit(
    client_id=['ID'],
    client_secret=['SECRET'],
    user_agent=['AGENT'],
    username=['USER'],
    password=['PASSWORD']
)

### Function to Get Subreddit Posts

In [3]:
#making function to grab each post type
def post_datafier(subreddit, sort_type):
    if sort_type == "new":
        posts = subreddit.new(limit=1000)
        time.sleep(10)
    elif sort_type == "top":
        posts = subreddit.top(limit=1000)
        time.sleep(10)
    elif sort_type == "hot":
        posts = subreddit.hot(limit=1000)
    elif sort_type == "rising":
        posts = subreddit.rising(limit=100)
    elif sort_type == "controversial":
        posts = subreddit.controversial(limit=1000)
    else:
        raise ValueError("OOPS! Sort type not in subreddit sort options.")
    
    data = []
    for post in posts:
        data.append([post.created_utc, post.title, post.selftext, post.subreddit, sort_type])
    
    df = pd.DataFrame(data, columns=['created_utc', 'title', 'self_text', 'subreddit', 'sort'])
    df_non_empty_txt = df[df['self_text'] != ""]
    df_cleaned = df_non_empty_txt.drop_duplicates()

    time.sleep(10)

    return df_cleaned

### Getting QDOBA Posts

In [4]:
#using praw to get qdoba subreddit data
qdoba_subreddit = reddit.subreddit('qdoba')

In [5]:
#getting the individual posts by sort and calling my function
sorts = ["new", "top", "hot", "rising", "controversial"]
qdoba_sorted = []

for sort in sorts:
    qdoba_one_sort = post_datafier(qdoba_subreddit, sort) #calling for posts for each iteration of a specific sort
    qdoba_sorted.append(qdoba_one_sort)

#adding all sorts to one df as the company
qdoba = pd.concat(qdoba_sorted, ignore_index=True)

qdoba.head() 

Unnamed: 0,created_utc,title,self_text,subreddit,sort
0,1713651000.0,Can a qdoba employee please shed some light on...,This is a throwaway account.\n\nSo sometimes l...,qdoba,new
1,1713560000.0,Found an artery in my birria Qdoba bowl last w...,"Yes, I'm aware that meat comes from animals an...",qdoba,new
2,1713396000.0,A la carte still available?,I used to always get an a la carte bowl + addi...,qdoba,new
3,1712707000.0,Elote,Any store start getting the elote ingredients?...,qdoba,new
4,1712293000.0,in defense of national burrito day,"absolutely joking, it sucks for both customers...",qdoba,new


In [6]:
qdoba.shape

(1833, 5)

In [7]:
qdoba.isna().sum().sum()

0

### Getting Chipotle Posts

In [8]:
#using praw to get chipotle subreddit data
chipotle_subreddit = reddit.subreddit('Chipotle')

In [9]:
#based on previous function
chipotle_sorted = []

for sort in sorts:
    chipotle_one_sort = post_datafier(chipotle_subreddit, sort)
    chipotle_sorted.append(chipotle_one_sort)

#adding all sorts to one df as the company
chipotle = pd.concat(chipotle_sorted, ignore_index=True)

chipotle.head() 

Unnamed: 0,created_utc,title,self_text,subreddit,sort
0,1713893000.0,Free BOGO 9/22/24 Hockey Deal,I had seen yesterday that there was a bogo dea...,Chipotle,new
1,1713893000.0,where did my points go?!?@?@?@,i had roughly 1500 points and when i went to u...,Chipotle,new
2,1713890000.0,Pepper color…,Sitting here chomping down on my delicious bow...,Chipotle,new
3,1713889000.0,Here’s why I don’t go to chipotle anymore,$13 with the drink from a small local business...,Chipotle,new
4,1713888000.0,Fajita veggies,It’s absolutely insane you guys can’t have eno...,Chipotle,new


In [10]:
chipotle.shape

(2860, 5)

In [11]:
chipotle.isna().sum().sum()

0

## Creating the Final DataFrame

In [46]:
df = pd.concat([chipotle, qdoba], ignore_index=True)
df.head()

Unnamed: 0,created_utc,title,self_text,subreddit,sort
0,1713893000.0,Free BOGO 9/22/24 Hockey Deal,I had seen yesterday that there was a bogo dea...,Chipotle,new
1,1713893000.0,where did my points go?!?@?@?@,i had roughly 1500 points and when i went to u...,Chipotle,new
2,1713890000.0,Pepper color…,Sitting here chomping down on my delicious bow...,Chipotle,new
3,1713889000.0,Here’s why I don’t go to chipotle anymore,$13 with the drink from a small local business...,Chipotle,new
4,1713888000.0,Fajita veggies,It’s absolutely insane you guys can’t have eno...,Chipotle,new


In [47]:
df.shape

(4693, 5)

In [48]:
df.isna().sum().sum()

0

In [49]:
df['subreddit'].unique()

array([Subreddit(display_name='Chipotle'),
       Subreddit(display_name='qdoba')], dtype=object)

### Binarizing the subreddit column so I can build my model

In [50]:
le = LabelEncoder()

subreddit_names = df['subreddit'].apply(lambda x: x.display_name)

df['subreddit'] = le.fit_transform(subreddit_names)

In [51]:
df

Unnamed: 0,created_utc,title,self_text,subreddit,sort
0,1.713893e+09,Free BOGO 9/22/24 Hockey Deal,I had seen yesterday that there was a bogo dea...,0,new
1,1.713893e+09,where did my points go?!?@?@?@,i had roughly 1500 points and when i went to u...,0,new
2,1.713890e+09,Pepper color…,Sitting here chomping down on my delicious bow...,0,new
3,1.713889e+09,Here’s why I don’t go to chipotle anymore,$13 with the drink from a small local business...,0,new
4,1.713888e+09,Fajita veggies,It’s absolutely insane you guys can’t have eno...,0,new
...,...,...,...,...,...
4688,1.363154e+09,Free Burritos?,"If you come in to the Issaquah, WA store next ...",1,controversial
4689,1.360524e+09,Is there any way to find out what e-mail is as...,Totally forgot what e-mail I used to log-in at...,1,controversial
4690,1.345751e+09,Qdoba logic,Sign says guacamole is free for all veggie bow...,1,controversial
4691,1.344575e+09,It's been a little slow!,"The subreddits been a little slow! I mean, the...",1,controversial


### Exporting Dataframes to CSV

In [42]:
#final df with chipotle and qdoba data concatenated
df.to_csv('./data/merged_dfs.csv', index=False)

In [43]:
#chipotle data
chipotle.to_csv('./data/chipotle_raw.csv', index = False)

In [44]:
#qdoba data
qdoba.to_csv('./data/qdoba_raw.csv', index = False)