In [150]:
# import reddit api wrapper
import praw
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup

from textblob import TextBlob
import altair as alt

In [151]:
# access password and client secret id via local files
with open('pw.txt', 'r') as file1:
    pw = file1.read()

with open('client_secret.txt', 'r') as file2:
    cs = file2.read()

In [152]:


# create a praw Reddit instance with app credentials and secret info passed through
reddit = praw.Reddit(
    client_id="XbesrQBvKymjgLdgg_D6lA",
    client_secret=cs,
    user_agent="NFLTextAnalysis/0.0.1",
    username="ta_api",
    password=pw
)

# store an instance of the Redditor object for the gamethread account
nfl_bot = reddit.redditor("nfl_gamethread")

In [153]:
# initialize a new list for storing gamethread data
gamethread_list = []

# for each gamethread, store the id, name, and date
for submission in nfl_bot.submissions.new(limit=20):
    gamethread_list.append((str(submission.id), str(submission.title), submission.created_utc))

gamethread_df = pd.DataFrame(gamethread_list, columns=['id','title','date'])

In [154]:
# transform the date column from UTC timestamp to only the date
gamethread_df['date'] = gamethread_df['date'].apply(lambda utc_entry: datetime.utcfromtimestamp(utc_entry))
gamethread_df['date'] = gamethread_df['date'].dt.date

In [155]:
# remove pre and post game threads, the superbowl halftime discussion, and the pro bowl discussion
gamethread_df = gamethread_df[gamethread_df["title"].str.contains("Pre|Post|Halftime|Pro Bowl|Super Bowl")==False].copy()

In [156]:
gamethread_text_list = []

# for each thread, get the body to extract score and other details
for id in gamethread_df['id']:
    submission = reddit.submission(id)
    
    soup = BeautifulSoup(submission.selftext_html, 'html.parser')

    #hometeam and score
    home_team = soup('tr')[2]('td')[0].text
    home_score = int(soup('tr')[2]('td')[-1].text)
    #awayteam and score
    away_team = soup('tr')[3]('td')[0].text
    away_score = int(soup('tr')[3]('td')[-1].text)
    #combined
    combined_score = home_score + away_score
    diff = max(home_score, away_score) - min(home_score, away_score)
    if max(home_score, away_score) == home_score:
        winner = home_team
    else:
        winner = away_team


    odds = str(soup('tr')[6]('td')[-1].text)
    odds_list = odds.split()
    odds_length = len(odds_list)

    # odds_list is length 5 if team city is two words, 4 otherwise
    if odds_length == 5:
        pred_winner = odds_list[0] + " " + odds_list[1]
        pred_diff = odds_list[2][1:]
        pred_ou = odds_list[4]   
    if odds_length == 4:
        pred_winner = odds_list[0]
        pred_diff = odds_list[1][1:]
        pred_ou = odds_list[3]

    gamethread_text_list.append((id, home_team, home_score, away_team, away_score, winner, combined_score, diff, pred_winner, pred_diff, pred_ou))

gamethread_text = pd.DataFrame(gamethread_text_list, columns=['id', 'home_team', 'home_score', 'away_team', 'away_score', 'winner', 'combined_score', 'diff', 'pred_winner', 'pred_diff', 'pred_ou'])

In [157]:
gamethread_text

Unnamed: 0,id,home_team,home_score,away_team,away_score,winner,combined_score,diff,pred_winner,pred_diff,pred_ou
0,sglhph,Rams,20,49ers,17,Rams,37,3,Los Angeles,3.5,45.5
1,sggrq9,Chiefs,24,Bengals,27,Bengals,51,3,Kansas City,7.0,54.5
2,sb6jfz,Chiefs,42,Bills,36,Chiefs,78,6,Kansas City,2.5,54.0
3,sb1o4b,Buccaneers,27,Rams,30,Rams,57,3,Tampa Bay,3.0,48.0


In [158]:
gamethread_df = gamethread_df.merge(gamethread_text, how='left', on='id')

gamethread_df

Unnamed: 0,id,title,date,home_team,home_score,away_team,away_score,winner,combined_score,diff,pred_winner,pred_diff,pred_ou
0,sglhph,Game Thread: San Francisco 49ers (10-7) at Los...,2022-01-30,Rams,20,49ers,17,Rams,37,3,Los Angeles,3.5,45.5
1,sggrq9,Game Thread: Cincinnati Bengals (10-7) at Kans...,2022-01-30,Chiefs,24,Bengals,27,Bengals,51,3,Kansas City,7.0,54.5
2,sb6jfz,Game Thread: Buffalo Bills (11-6) at Kansas Ci...,2022-01-23,Chiefs,42,Bills,36,Chiefs,78,6,Kansas City,2.5,54.0
3,sb1o4b,Game Thread: Los Angeles Rams (12-5) at Tampa ...,2022-01-23,Buccaneers,27,Rams,30,Rams,57,3,Tampa Bay,3.0,48.0


In [159]:

# for each thread, get the comments
# write a function that returns details about the comments - polarity, subjectivity, fanbase aggregate details, and merge onto the gamethread id?