In [1]:
# Path hack
import os
# change directory from the current Analysis folder to the top level folder for easier navigation
os.chdir('../')
# confirm we're at /RedditTextAnalysis
print(os.getcwd())

/Users/prcork/MiscDataProjects/collaboration/RedditTextAnalysis


In [2]:
# import reddit api wrapper
import praw
import pandas as pd
from textblob import TextBlob
import altair as alt

### Reddit API Instance

In [3]:
# access password and client secret id via local files
with open('pw.txt', 'r') as file1:
    pw = file1.read()

with open('client_secret.txt', 'r') as file2:
    cs = file2.read()

In [4]:
# create a praw Reddit instance with app credentials and secret info passed through
reddit = praw.Reddit(
    client_id="XbesrQBvKymjgLdgg_D6lA",
    client_secret=cs,
    user_agent="NFLTextAnalysis/0.0.1",
    username="ta_api",
    password=pw
)

In [5]:
# create a praw Submission instance for the NO-TN game thread and confirm the correct thread via title
submission = reddit.submission("qtuqqs")
submission.title

'Game Thread: New Orleans Saints (5-3) at Tennessee Titans (7-2)'

### Create Comments Dataframe

### Test Function Implementation at the individual gamethread level

In [6]:
# note that the %load_ext autoreload line only needs to be be run once
%load_ext autoreload

In [7]:
# by including this autoreload command, we only need to re-import Spatial_Joins if we make/save changes to the original py file
%autoreload
from nfl_gamethreads import nfl_gamethreads

In [9]:
NO_TN_list = nfl_gamethreads.get_comments(reddit, 'qtuqqs')
len(NO_TN_list)

Comments: 2163


2163

In [10]:
comments_df = pd.DataFrame(NO_TN_list, columns=['comment_id', 'submission_id', 'author', 'body', 'upvotes', 'utc_time', 'author_flair'])

In [11]:
comments_df = nfl_gamethreads.analyze_text(comments_df, text_column='body')
comments_df.sample(10).style.background_gradient()

Unnamed: 0,comment_id,submission_id,author,body,upvotes,utc_time,author_flair,polarity,subjectivity
319,hkm6zb3,qtuqqs,NosyargKcid,That finger was fucked,5,1636914120.0,:Buccaneers: Buccaneers,-0.6,0.7
594,hkmwunv,qtuqqs,Pyrrhus65,"After watching the Saints kicker miss 2 extra points, I think we all knew that was exactly the margin the game would be decided by.",9,1636923823.0,:Falcons: Falcons,-0.05,0.25
232,hkmcu5t,qtuqqs,,"“BWW taps a new keg every 35 seconds” There are 1217 BWW in the nation They are open approx 12 hours a day. Which is 43,200 seconds If each one taps one keg per day, that equates to 35.4 seconds for each keg tap So they aren’t lying but they are misleading The fuckers",7,1636916268.0,,0.068182,0.477273
588,hkm31uq,qtuqqs,WorthlessSemicolon,Josh did not slip lol.,8,1636912652.0,:Titans: Titans,0.8,0.7
1444,hkmm1yr,qtuqqs,NNKarma,As if it was a surprise with all the best skill position players injured on offense,3,1636919688.0,:Saints: Saints,1.0,0.3
271,hkmlhte,qtuqqs,Oren-,what the heck is arthur smith doing with these poor falcons man lol,6,1636919478.0,:Titans: Titans,0.2,0.65
1502,hkmfovg,qtuqqs,Successful-Client215,Titans fans are disgusted,2,1636917310.0,:Titans: Titans,-1.0,1.0
466,hkmcz06,qtuqqs,Arrestedbybatman,Booger fuckin hates us,3,1636916314.0,:Titans: Titans,0.0,0.0
74,hkmphax,qtuqqs,ChocolateMorsels,Run when it’s clearly not working at crucial moments. Classic titans.,10,1636920968.0,:Titans: Titans,0.038889,0.516667
2088,hkmvmbb,qtuqqs,,[deleted],2,1636923334.0,,0.0,0.0


### Variable Distributions

In [12]:
polar = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()'
)

subject = alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()'
)

alt.hconcat(polar, subject)

In [13]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("upvotes:Q", bin=True),
    y='count()'
)

In [14]:
alt.Chart(data=comments_df).mark_bar().encode(
    alt.X("author_flair"),
    y='count()'
)

In [15]:
alt.Chart(data=comments_df[comments_df['utc_time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='utc_time'
)

In [16]:
primary_fans_df = comments_df[comments_df['author_flair'].isin([':Saints: Saints', ':Titans: Titans'])]
primary_fans_df

Unnamed: 0,comment_id,submission_id,author,body,upvotes,utc_time,author_flair,polarity,subjectivity
0,hkmff63,qtuqqs,KentuckyBourbon94,An underrated factor of playing against the Sa...,63,1.636917e+09,:Titans: Titans,-0.300000,0.400000
1,hkmfulz,qtuqqs,PuddingJello,Next year RTP calls will be reviewable but ref...,38,1.636917e+09,:Saints: Saints,-0.129365,0.171429
2,hkmfxb1,qtuqqs,Theinsulated,I wish I could fuck up every aspect of my job ...,41,1.636917e+09,:Saints: Saints,-0.400000,0.600000
3,hkmffyh,qtuqqs,Successful-Client215,I'm a Titans fan and these QB roughing calls a...,26,1.636917e+09,:Titans: Titans,-0.550000,0.533333
4,hkmgdvr,qtuqqs,TotesMcGotes13,Sorry Saints bros. That RTP was bullshit.,28,1.636918e+09,:Titans: Titans,-0.500000,1.000000
...,...,...,...,...,...,...,...,...,...
2158,hkmxk5c,qtuqqs,Never_Less,Ok. And they won. That is literally the only t...,1,1.636924e+09,:Titans: Titans,0.000000,1.000000
2159,hknp28h,qtuqqs,Never_Less,I do appreciate you beating the Cardinals.,1,1.636936e+09,:Titans: Titans,0.000000,0.000000
2160,hkmxfox,qtuqqs,Mr_Sarcastic12,🤡🤡🤡,1,1.636924e+09,:Titans: Titans,0.000000,0.000000
2161,hkn14vv,qtuqqs,No-Can946,"Just scored another, how can you not see it",1,1.636925e+09,:Saints: Saints,0.000000,0.000000


In [17]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("polarity:Q", bin=True),
    y='count()',
    color='author_flair',
    column='author_flair'
)

In [18]:
alt.Chart(data=primary_fans_df).mark_bar().encode(
    alt.X("subjectivity:Q", bin=True),
    y='count()',
    color='author_flair',
    column='author_flair'
)

In [19]:
alt.Chart(data=primary_fans_df[primary_fans_df['utc_time']<1636925000]).mark_line(point=True).encode(
    alt.Y("polarity:Q"),
    x='utc_time',
    color='author_flair'
)

In [20]:
alt.Chart(primary_fans_df).mark_point().encode(
    x='polarity:Q',
    y='subjectivity:Q',
    color='author_flair:N',
    column='author_flair:N'
)

## Ideas

- Which fandoms are the most/least subjective & polar?
- Which fandoms show up in rival game threads the most?
- Which fandoms show up in other game threads the most? (need to normalize for fandom population)
- If we scrape multiple years, how have variables of interest changed over time?
- Which fandoms are most negative about their own team/rival/opponent in game threads?
- What types of games are the most polarizing? Rivalry? Later in the season? Narrative explanations? In-game reasons?
- Which fandoms have the foulest mouth (filter for curse words specifically)
- Which fandoms hate the refs the most/least
- most popular words by fandom/game thread
- extract popular player names and do sentiment analysis on the subset of their relevant comments