In [1]:
# import reddit api wrapper
import praw
import pandas as pd
from textblob import TextBlob
import altair as alt

### Reddit API Instance

In [2]:
# access password and client secret id via local files
with open('pw.txt', 'r') as file1:
    pw = file1.read()

with open('client_secret.txt', 'r') as file2:
    cs = file2.read()

In [3]:
# create a praw Reddit instance with app credentials and secret info passed through
reddit = praw.Reddit(
    client_id="XbesrQBvKymjgLdgg_D6lA",
    client_secret=cs,
    user_agent="NFLTextAnalysis/0.0.1",
    username="ta_api",
    password=pw
)

In [4]:
# create a praw Submission instance for the NO-TN game thread and confirm the correct thread via title
submission = reddit.submission("qtuqqs")
submission.title

'Game Thread: New Orleans Saints (5-3) at Tennessee Titans (7-2)'

### Create Comments Dataframe

### Test Function Implementation at the individual gamethread level

In [5]:
# note that the %load_ext autoreload line only needs to be be run once
%load_ext autoreload

In [6]:
# by including this autoreload command, we only need to re-import Spatial_Joins if we make/save changes to the original py file
%autoreload
from nfl_gamethreads import nfl_gamethreads

In [7]:
NO_TN_list = nfl_gamethreads.get_comments(reddit, 'qtuqqs', comments_only=False)
len(NO_TN_list)

2165

In [8]:
NO_TN_data = nfl_gamethreads.get_game_data(reddit, 'qtuqqs')
print(NO_TN_data)

('qtuqqs', 'New Orleans Saints (5-3) at Tennessee Titans (7-2)', 'Tennessee Titans', '7', '2', 0, 'New Orleans Saints', '5', '3', 0, 23, 21, 'Titans', 44, 2, 'Tennessee', '3.0', '42.5')


In [9]:
comments_df = pd.DataFrame(NO_TN_list, columns=['submission_id', 'author', 'body', 'upvotes', 'utc_time', 'author_flair'])

In [10]:
comments_df = nfl_gamethreads.analyze_text_df(comments_df, text_column='body')
comments_df.sample(10).style.background_gradient()

Unnamed: 0,submission_id,author,body,upvotes,utc_time,author_flair,polarity,subjectivity
1299,qtuqqs,SCsoccer,A disastrous end to that drive.,1,1636923068.0,:Bears: Bears,-0.7,0.8
1655,qtuqqs,BlueThumbtack08,If that was the case then he lost his nickname 4 minutes into our season opener,5,1636915437.0,:Titans: Titans,0.0,0.0
2121,qtuqqs,Paranatural,No,1,1636914770.0,:Saints: Saints,0.0,0.0
1216,qtuqqs,,LOL,1,1636917118.0,,0.8,0.7
1493,qtuqqs,LaneKiffinsAlterEgo,❤️ your secondary,2,1636913047.0,:Titans: Titans,-0.3,0.3
1056,qtuqqs,ChiaTiger,"Trautman will be addition by subtraction this offseason. Terrible player, sloppy on the field, can’t catch, fumbles when he does catch it, I can’t name one thing he does well",3,1636923787.0,:Saints: Saints,-0.708333,0.875
472,qtuqqs,thediesel26,Just give the QBs red jerseys and don’t let the defense touch them if we’re gonna call this bs. Jesus fucking Christ.,4,1636917158.0,:Dolphins: Dolphins,-0.3,0.4
2163,qtuqqs,No-Can946,You know what’s great for chemistry? Winning.,1,1636925836.0,:Saints: Saints,0.65,0.75
1889,qtuqqs,NNKarma,Even I thought we were going to rush midplay,1,1636915427.0,:Saints: Saints,0.0,0.0
2016,qtuqqs,FidgetyLeper,"Yeah I'm fine with ticky tack PI if they're ticky tack both ways, but that was an abysmal RTP and it sucks that refs are above criticism for sucking at their job",1,1636918087.0,:Titans: Titans,0.038889,0.3


### Variable Distributions

In [11]:
polar = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()'
)

subject = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()'
)

alt.hconcat(polar, subject)

In [14]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("upvotes:Q", bin=True),
    y='count()'
)

In [15]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("author_flair"),
    y='count()'
)

In [18]:
alt.Chart(data=comments_df[comments_df['utc_time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='utc_time'
)

In [19]:
primary_fans_df = comments_df[comments_df['author_flair'].isin([':Saints: Saints', ':Titans: Titans'])]
primary_fans_df

Unnamed: 0,submission_id,author,body,upvotes,utc_time,author_flair,polarity,subjectivity
0,qtuqqs,KentuckyBourbon94,An underrated factor of playing against the Sa...,62,1.636917e+09,:Titans: Titans,-0.300000,0.400000
1,qtuqqs,PuddingJello,Next year RTP calls will be reviewable but ref...,36,1.636917e+09,:Saints: Saints,-0.129365,0.171429
2,qtuqqs,Theinsulated,I wish I could fuck up every aspect of my job ...,39,1.636917e+09,:Saints: Saints,-0.400000,0.600000
3,qtuqqs,Successful-Client215,I'm a Titans fan and these QB roughing calls a...,27,1.636917e+09,:Titans: Titans,-0.550000,0.533333
4,qtuqqs,TotesMcGotes13,Sorry Saints bros. That RTP was bullshit.,27,1.636918e+09,:Titans: Titans,-0.500000,1.000000
...,...,...,...,...,...,...,...,...
2160,qtuqqs,Mr_Sarcastic12,🤡🤡🤡,1,1.636924e+09,:Titans: Titans,0.000000,0.000000
2161,qtuqqs,No-Can946,"Just scored another, how can you not see it",1,1.636925e+09,:Saints: Saints,0.000000,0.000000
2162,qtuqqs,scottonetwenty,Because Cam scoring on the Cardinals has anyth...,1,1.636926e+09,:Saints: Saints,-0.155556,0.288889
2163,qtuqqs,No-Can946,You know what’s great for chemistry? Winning.,1,1.636926e+09,:Saints: Saints,0.650000,0.750000


In [20]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()',
    color='author_flair',
    column='author_flair'
)

In [21]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()',
    color='author_flair',
    column='author_flair'
)

In [23]:
alt.Chart(data=primary_fans_df[primary_fans_df['utc_time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='utc_time',
    color='author_flair'
)

In [24]:
alt.Chart(primary_fans_df).mark_point().encode(
    x='polarity:Q',
    y='subjectivity:Q',
    color='author_flair:N',
    column='author_flair:N'
)

## Ideas

- Which fandoms are the most/least subjective & polar?
- Which fandoms show up in rival game threads the most?
- Which fandoms show up in other game threads the most? (need to normalize for fandom population)
- If we scrape multiple years, how have variables of interest changed over time?
- Which fandoms are most negative about their own team/rival/opponent in game threads?
- What types of games are the most polarizing? Rivalry? Later in the season? Narrative explanations? In-game reasons?
- Which fandoms have the foulest mouth (filter for curse words specifically)
- Which fandoms hate the refs the most/least
- most popular words by fandom/game thread
- extract popular player names and do sentiment analysis on the subset of their relevant comments