In [1]:
# import reddit api wrapper
import praw
import pandas as pd
from textblob import TextBlob
import altair as alt

### Reddit API Instance

In [2]:
# access password and client secret id via local files
with open('pw.txt', 'r') as file1:
    pw = file1.read()

with open('client_secret.txt', 'r') as file2:
    cs = file2.read()

In [3]:
# create a praw Reddit instance with app credentials and secret info passed through
reddit = praw.Reddit(
    client_id="XbesrQBvKymjgLdgg_D6lA",
    client_secret=cs,
    user_agent="NFLTextAnalysis/0.0.1",
    username="ta_api",
    password=pw
)

In [64]:
# create a praw Submission instance for the NO-TN game thread and confirm the correct thread via title
submission = reddit.submission("qtuqqs")
submission.title

'Game Thread: New Orleans Saints (5-3) at Tennessee Titans (7-2)'

### Create Comments Dataframe

In [65]:
# create a new list for appending comments
comments_list = []

# ignore all of the "Load More Comment" prompts to return entire comment tree
submission.comments.replace_more(limit=None)
# for each comment and subsequent sub-comments nested throughout
for comment in submission.comments.list():
    # append a tupple of the variables of interest to list
    comments_list.append((str(comment.author), str(comment.body), int(comment.ups), int(comment.downs), comment.created_utc, str(comment.author_flair_text)))
    
# transform list to dataframe
comments_df = pd.DataFrame(comments_list, columns=['author', 'body','ups','downs', 'time', 'flair_text',])

In [66]:
# preview dataframe
comments_df.head()

Unnamed: 0,author,body,ups,downs,time,flair_text
0,KentuckyBourbon94,An underrated factor of playing against the Sa...,62,0,1636917000.0,:Titans: Titans
1,PuddingJello,Next year RTP calls will be reviewable but ref...,40,0,1636917000.0,:Saints: Saints
2,Theinsulated,I wish I could fuck up every aspect of my job ...,41,0,1636917000.0,:Saints: Saints
3,Successful-Client215,I'm a Titans fan and these QB roughing calls a...,27,0,1636917000.0,:Titans: Titans
4,TotesMcGotes13,Sorry Saints bros. That RTP was bullshit.,28,0,1636918000.0,:Titans: Titans


In [67]:
comments_df.dtypes

author         object
body           object
ups             int64
downs           int64
time          float64
flair_text     object
dtype: object

### Simple Sentiment Analysis

In [68]:
comments_df["polarity"] = [TextBlob(each).sentiment.polarity for each in comments_df["body"]]
comments_df["subjectivity"] = [TextBlob(each).sentiment.subjectivity for each in comments_df["body"]]

In [69]:
comments_df[["body", "polarity", "subjectivity"]].head(10).style.background_gradient()

Unnamed: 0,body,polarity,subjectivity
0,An underrated factor of playing against the Saints is that you’re going to get at least one beneficial call from the refs,-0.3,0.4
1,Next year RTP calls will be reviewable but refs will never overturn a single challenge and the rule will be called a failure and removed,-0.129365,0.171429
2,I wish I could fuck up every aspect of my job and never have to worry about consequences.,-0.4,0.6
3,I'm a Titans fan and these QB roughing calls are bullshit. Its bad for the game.,-0.55,0.533333
4,Sorry Saints bros. That RTP was bullshit.,-0.5,1.0
5,Looks like Sean and Dennis Allen didn’t get the defense ready for the Titans’ to have an infinite number of downs on the goal line. Seems like an oversight on their part to me.,0.2,0.5
6,Horseshit. Im sorry saint bros.,-0.5,1.0
7,[removed],0.0,0.0
8,"This league is really going downhill. Ridiculous taunting rules, can’t breath on the QB anymore, picking and choosing when to call borderline PI. Let’s just skip to Rams vs Tampa NFC championship already.",-0.066667,0.6
9,Josh Allen “slipped” when he ran into the brick wall that is El Jefe,0.0,0.0


### Test Function Implementation

In [12]:
# note that the %load_ext autoreload line only needs to be be run once
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
# by including this autoreload command, we only need to re-import Spatial_Joins if we make/save changes to the original py file
%autoreload
from nfl_gamethreads import nfl_gamethreads

In [11]:
NO_TN_list = nfl_gamethreads.get_comments(reddit, 'qtuqqs')
len(NO_TN_list)

1699

In [17]:
NO_TN_data = nfl_gamethreads.get_game_data(reddit, 'qtuqqs')
print(NO_TN_data)

('qtuqqs', 'Titans', 23, 'Saints', 21, 'Titans', 44, 2, 'Tennessee', '3.0', '42.5')


### Variable Distributions

In [70]:
polar = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()'
)

subject = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()'
)

alt.hconcat(polar, subject)

In [71]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("ups:Q", bin=True),
    y='count()'
)

In [72]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("flair_text"),
    y='count()'
)

In [78]:
alt.Chart(data=comments_df[comments_df['time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='time'
)

In [74]:
primary_fans_df = comments_df[comments_df['flair_text'].isin([':Saints: Saints', ':Titans: Titans'])]
primary_fans_df

Unnamed: 0,author,body,ups,downs,time,flair_text,polarity,subjectivity
0,KentuckyBourbon94,An underrated factor of playing against the Sa...,62,0,1.636917e+09,:Titans: Titans,-0.300000,0.400000
1,PuddingJello,Next year RTP calls will be reviewable but ref...,40,0,1.636917e+09,:Saints: Saints,-0.129365,0.171429
2,Theinsulated,I wish I could fuck up every aspect of my job ...,41,0,1.636917e+09,:Saints: Saints,-0.400000,0.600000
3,Successful-Client215,I'm a Titans fan and these QB roughing calls a...,27,0,1.636917e+09,:Titans: Titans,-0.550000,0.533333
4,TotesMcGotes13,Sorry Saints bros. That RTP was bullshit.,28,0,1.636918e+09,:Titans: Titans,-0.500000,1.000000
...,...,...,...,...,...,...,...,...
2165,Mr_Sarcastic12,🤡🤡🤡,1,0,1.636924e+09,:Titans: Titans,0.000000,0.000000
2166,No-Can946,"Just scored another, how can you not see it",1,0,1.636925e+09,:Saints: Saints,0.000000,0.000000
2167,scottonetwenty,Because Cam scoring on the Cardinals has anyth...,1,0,1.636926e+09,:Saints: Saints,-0.155556,0.288889
2168,No-Can946,You know what’s great for chemistry? Winning.,1,0,1.636926e+09,:Saints: Saints,0.650000,0.750000


In [75]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()',
    color='flair_text',
    column='flair_text'
)

In [76]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()',
    color='flair_text',
    column='flair_text'
)

In [79]:
alt.Chart(data=primary_fans_df[primary_fans_df['time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='time',
    color='flair_text'
)

In [80]:
alt.Chart(primary_fans_df).mark_point().encode(
    x='polarity:Q',
    y='subjectivity:Q',
    color='flair_text:N',
    column='flair_text:N'
)

## Ideas

- Which fandoms are the most/least subjective & polar?
- Which fandoms show up in rival game threads the most?
- Which fandoms show up in other game threads the most? (need to normalize for fandom population)
- If we scrape multiple years, how have variables of interest changed over time?
- Which fandoms are most negative about their own team/rival/opponent in game threads?
- What types of games are the most polarizing? Rivalry? Later in the season? Narrative explanations? In-game reasons?
- Which fandoms have the foulest mouth (filter for curse words specifically)
- Which fandoms hate the refs the most/least
- most popular words by fandom/game thread
- extract popular player names and do sentiment analysis on the subset of their relevant comments