# Matchday Thread Analyzer

In [111]:
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from datetime import datetime
import nltk
from nltk import FreqDist
import praw
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(color_codes=True)
from sklearn.feature_extraction import text
import string


# Initiate reddit instance
reddit = praw.Reddit('match-day-bot', user_agent='match-day-bot user agent')

In [2]:
coys_matchday_thread = reddit.submission(
    url='https://www.reddit.com/r/coys/comments/8j3tx0/match_thread_spurs_v_leicester_pl_13_may_2018/')

In [3]:
coys_matchday_thread.comments.replace_more(limit=None)
matchday_comment_instances = [
    comment for comment in coys_matchday_thread.comments.list()]

### Collect match thread comments and comment metadata

In [5]:
author = [comment.author for comment in matchday_comment_instances]
body = [comment.body for comment in matchday_comment_instances]
karma = [comment.score for comment in matchday_comment_instances]
year = [datetime.utcfromtimestamp(
    comment.created_utc).year for comment in matchday_comment_instances]
month = [datetime.utcfromtimestamp(
    comment.created_utc).month for comment in matchday_comment_instances]
day = [datetime.utcfromtimestamp(
    comment.created_utc).day for comment in matchday_comment_instances]
hour = [datetime.utcfromtimestamp(
    comment.created_utc).hour for comment in matchday_comment_instances]
minute = [datetime.utcfromtimestamp(
    comment.created_utc).minute for comment in matchday_comment_instances]

### Place comment data into a Pandas dataframe

In [6]:
match_thread_data = {'username': author, 'comment': body, 'karma': karma,
                     'year': year, 'month': month, 'day': day, 'hour': hour, 'minute': minute}
df_match_thread = pd.DataFrame(data=match_thread_data)
df_match_thread = df_match_thread[[
    'username', 'comment', 'karma', 'year', 'month', 'day', 'hour', 'minute']]
remove_delated = df_match_thread[(df_match_thread['comment'] != '[deleted]')]
remove_deleted_row_indices = remove_delated.index
df_match_thread = df_match_thread.loc[remove_deleted_row_indices, :]

In [7]:
df_match_thread.head()

Unnamed: 0,username,comment,karma,year,month,day,hour,minute
0,akanefive,NBC announcer giving incorrect information abo...,68,2018,5,13,14,31
1,charcoil23,Dumb fuck announcers don't know that 4th place...,55,2018,5,13,14,31
2,a_magic_wizard,"""Toby is our best defender and the core of our...",46,2018,5,13,14,27
3,Keskekun,Sub off lamela and sissoko and bring on Keane ...,44,2018,5,13,14,41
4,TheGameIsAboutGlory1,I fucking hate how goalkeepers are a protected...,46,2018,5,13,14,48


### Find the top 30 words used throughout the match thread

In [8]:
def text_lemmatize(text):
    """
    tokenize, lemmatize, and remove all punctuation from a string

    Example
    -------
    >>>lemmatize_text('The quick brown fox jumped over the lazy dog.')
    ['the', 'quick', 'brown', 'fox', 'jumped', 'over', 'the', 'lazy', 'dog']
    """
    w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
    lemmatizer = nltk.stem.WordNetLemmatizer()
    translator = str.maketrans('', '', string.punctuation)
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text.lower().translate(translator))]


stop_words = text.ENGLISH_STOP_WORDS


def word_count(dataframe, column, stop_words=stop_words):
    # Apply lemmatize_text function to each base column
    lemmatized_docs = text_lemmatize(dataframe[column].str.cat(sep=" "))
    # Only keep words with a length of greater than 3 characters and remove stop words
    lemmatized_docs = [w for w in lemmatized_docs if len(
        w) > 3 and w not in stop_words]
    # Generate word frequencies and order from greatest to least
    return FreqDist(lemmatized_docs).most_common()


word_count(df_match_thread, 'comment')[:30]

[('lamela', 146),
 ('game', 129),
 ('just', 117),
 ('fuck', 108),
 ('goal', 96),
 ('season', 95),
 ('kane', 94),
 ('like', 84),
 ('good', 83),
 ('fucking', 77),
 ('sissoko', 74),
 ('shit', 61),
 ('think', 56),
 ('need', 55),
 ('really', 51),
 ('dont', 48),
 ('player', 48),
 ('rose', 45),
 ('ball', 44),
 ('love', 44),
 ('match', 44),
 ('play', 43),
 ('time', 41),
 ('harry', 40),
 ('look', 40),
 ('commentator', 38),
 ('right', 37),
 ('wanyama', 37),
 ('want', 37),
 ('dier', 37)]

## Tottenham v Leicester started at 14:00 UTC
### Vardy kick started the match with an early goal at the 4 minute mark: TOT 0-1 LEI

In [9]:
pd.options.display.max_colwidth = 300


def comments_game_snapshot(dataframe, hour, minute_start, minute_end):
    game_snapshot_criteria = dataframe[(dataframe['hour'] == hour) & (
        dataframe['minute'] >= minute_start) & (dataframe['minute'] <= minute_end)]
    game_snapshot_row_indices = game_snapshot_criteria.index
    df_game_snapshot = dataframe.loc[game_snapshot_row_indices, :]
    df_game_snapshot = df_game_snapshot.sort_values(
        by=['karma'], ascending=False)
    return df_game_snapshot[['username', 'comment', 'karma']]


comments_game_snapshot(dataframe=df_match_thread,
                       hour=14,
                       minute_start=4,
                       minute_end=6)[:10]

Unnamed: 0,username,comment,karma
219,buttlovingpanda,KWP gives up the free kick then lets Vardy loose for the goal. Not a great start for him. Come on youngin!,13
374,charcoil23,KWP 100% at fault for that.,8
138,mrocks301,Jesus we are shit on set pieces,8
1402,gobucks2,Lamela floating in no man's land didn't exactly help him out...,7
384,shaalth,KWP gave the free kick away and then lost Vardy for the header...,5
383,Zengoroth,Lol ffs,4
386,fictional_pulp,That start was sub-optimal.,4
511,oysterpirate,Well that about sums up the last two months,3
512,FPnigel,and it begins...,3
513,TheRcktMan,Not a good start,3


### Kane answers with a 7th minute goal: TOT 1-1 LEI

In [10]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=14,
                       minute_start=7,
                       minute_end=11)[:10]

Unnamed: 0,username,comment,karma
139,Callum247,That’ll shut up all the miserable whiners in here.,9
141,Spursfan14,It’s just inevitable that he wins the golden boot isn’t it?,8
177,Keskekun,"He's coming for you, he's coming for yooooou. Tiny Egyptian Afroman he's coming for you",7
229,buttlovingpanda,Fucking Kane lol,6
230,alreadymilesaway,"I say it every game, but I fucking love Harry",6
231,SenorQuack,KWP looking solid in attack,6
291,joeypickthall,THIS GAME IS GONNA BE FUCKING CRAZY,6
140,macrowave2,Big boy Lucas making the interception/assist,6
289,Chroem-,Harry want the Golden boot bois!!!!,5
290,tripstreet,"THATS A PROPER FINISH HARRY, GO ON",5


### Mahrez scores at the 16th minute: TOT 1-2 LEI

In [11]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=14,
                       minute_start=16,
                       minute_end=20)[:10]

Unnamed: 0,username,comment,karma
27,TheGameIsAboutGlory1,"That goal is absolutely, 100% on Wanyama. No fucking idea what he's doing there, but he straight up does the dumbest shit at times. Can't blame ""playing through an injury"" on that garbage.",19
37,assassin_9729,Thank fuck we won on wednesday,16
282,Blazing_Frazer,I'll take a 4\-4 if Kane gets the golden boot tbh...,10
112,khj24,What the fuck is wanyama doing,10
142,TheGameIsAboutGlory1,"Fuck, might as well turn the match off now. ""No way this match is gonna end at 2-1."" Whenever commentators say shit like that after a fast start, the goals always stop.",8
143,ZParis,"Damn Lucas, that touch was dirty.",8
178,realquarterb,8-2 it is boys,7
179,mikezomfg,what the fuck without jan we are literally shambolic,7
180,highrouleur,Would we take 4-4 with Kane getting enough to beat salah to the Boot?,7
236,Revalie,5-5 incoming,7


### Iheanacho scores at the 47th minute: TOT 1-3 LEI

In [34]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=5,
                       minute_end=8)[:10]

Unnamed: 0,username,comment,karma
99,Blazing_Frazer,Just have to laugh really,11
496,stella__art,Why are we the way we are,8
150,Undercoverfootmodel,Good thing we are already in champions not having to play in sña mini playoff...,8
151,AnotherScoutMain,What the fuck is this game,7
247,khj24,Nobody told the announcers at HT about the qualifying rounds?!?! And lol @ third consecutive third placed finish,7
248,akanefive,"Holy shit the announcer just said that if Spurs won it would be their third straight third place finish, and that it would mean they wouldn’t have to play a CL qualifier. FFS.",6
312,TehElk,Lmao this game,6
415,CruxMihiGrataQuies,Reminds me of a pre-Poch era match.,5
413,PerfectLoops,"Banger! Holding off wanyama and smacking it top corner. Hats off, nice goal",4
936,AnotherScoutMain,This sport is gay,4


### Lamela responds with a goal at the 49th minute: TOT 2-3 LEI

In [37]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=9,
                       minute_end=12)[:10]

Unnamed: 0,username,comment,karma
38,strawberry_girls,Imagine this sub right now if last week’s game hadn’t ended the way it did,16
82,bterre108,"Commentators have been shit, but shoutout to the camera man finding that dude with two massive tubs of popcorn",12
83,tripstreet,lamela checked this forum at half time and says get fucked haters,11
372,Jackalope117,eRiK lAmELa iSnT gOoD eNoUgH fOr Us,10
152,tjakes12,What a ball from Lucas btw,9
377,bterre108,Lucas just CREATES. Lad needs to play more,9
115,ndphillips,I'm very sorry to report that u/WindyCOYS exploded with happiness after KWP's assist.,9
114,BrbnDrnkr,Lamela haters go away,9
153,Cool_Sandwich1,Moura is so good!,8
250,MaxMhad,Lamela read this fucking thread at half time and wanted to shut everyone up,7


### Fuchs own goal 53': TOT 3-3 LEI

In [42]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=13,
                       minute_end=17)[:10]

Unnamed: 0,username,comment,karma
40,PMYOUMYTITS,Lamela reading this thread at halftime.,16
101,warox13,"St. James' Park on the final day will get you, Chelsea.",10
285,TELLS_YOU_TO_FUCKOFF,ALL THE PEOPLE CHATTING SHIT ABOUT LAMELA JUST GOT SILENCED,10
100,ndphillips,Coco's coming for salah,9
154,Tomisnthere,Lamelas desire is amazing,9
84,HarryWanks,"""Lamela doesn't have the quality to play on an top 4 club""\n\n""Lamela is so shit what does he being to the team? ""\n\n",9
186,MidFlightRiot,"6 goals 53 mins, and it's not being shown anywhere ffs",8
185,ClassWarNowII,Maybe we should insult our players more often.,7
254,PMYOUMYTITS,COCO <3,6
320,IntellegentIdiot,At this rate Lamela will win the Golden Boot,6


### Lamela again 60': TOT 4-3 LEI

In [43]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=18,
                       minute_end=22)[:10]

Unnamed: 0,username,comment,karma
26,eric844,"Lamela shutting down the haters, including me. Gg coco",19
30,H2Pcoys,LOL CHELSEA\n\n3-0 to Newcastle,17
41,Thetonn,This wonderful clusterfuck is a lot more fun when it isn't deciding champions league football.,15
56,HoratioMG,LAMELAAAAAAA YOU BEAUTYYYY,13
65,TELLS_YOU_TO_FUCKOFF,"LADS, IT'S FUCKING TOTTENHAM",12
224,bterre108,"r/coys: Lamela is shit, arguably should be off this team\n\nLamela: hat trick in 15 minues. \n\nWhat even is life?",12
103,warox13,HAHAHAAHAHAHAHAHAHAAH LAMELAAAAAAA,11
116,ndphillips,"Our defense looks like 11 people who'd never played FIFA, all with the R2 button held down continually.",10
118,AnotherScoutMain,Quick Kane switch identities with lamela,10
104,khj24,Lamela catching salah you heard it here first,9


### Vardy's equalizer at 73' sandwiched in between Kane's "selfish" play and Kane winning the game: TOT 4-4 LEI

In [56]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=32,
                       minute_end=33)[:10]

Unnamed: 0,username,comment,karma
24,jetmora4,Kane it’s too late for the boot now you can’t be doing selfish shit like that,19
42,Xiomaraff,Wow classic ‘18 Spurs. 2 penalties not fucking given and then we let in a goal. Fuck the fuck off with this bullshit,15
44,MertBot,Just because it was an accident doesn't make it not a foul. Clear pen :/,14
68,alterego87,Oh Kane fuck off,12
89,NeonUprising,"Harry Kane was so selfish there, fuck",12
67,khj24,That’s on Kane,12
124,albinuss,Nah what the fuck I want third place fuck,10
105,GhostofBobStoops,I feel like I'm watching a fucking Fifa game\n,9
271,trems21,can someone tell me why poch likes subbing off players who play well,7
201,mcicchillo,2 clear penalties no calls,7


### Kane scores the winner 76': TOT 5-4 LEI

In [57]:
comments_game_snapshot(dataframe=df_match_thread,
                       hour=15,
                       minute_start=34,
                       minute_end=38)[:10]

Unnamed: 0,username,comment,karma
12,Thetonn,"At this rate, Harry could still get the golden boot and at the same time Spurs still lose.",25
45,spurs-r-us,Tears in my eyes watching Gazza celebrate,14
46,scottzander,DID YOU JUST SEE GAZZA DOING THE FORTNITE CELEBRATION,14
57,scottzander,"Sanchez up front for the knock downs, good idea Poch, we need some more goals!",14
221,COYCOYS,"So I went to both drab Watford and Newcastle games, decided to skip this game and it's 5-4 ffffsssss hate myself",13
69,warox13,[This Sub Today](https://i.imgur.com/x2zNkuV.jpg),13
1656,PMYOUMYTITS,Poch wants to make sure Coco doesn't steal Kane's goals. No chance of that with Sissoko.,12
70,Denzel8179,I’ve bet on Harry hat trick 🤞,11
107,H2Pcoys,America would be 100% in if every game was played like this,11
91,PMYOUMYTITS,Anyone have a gif of gazza doing the dance?,10


In [69]:
df_match_thread['time'] = df_match_thread['hour'].map(
    str) + '.' + df_match_thread['minute'].map(str)
df_match_thread['time'] = df_match_thread['time'].apply(pd.to_numeric)
df_match_thread.head()

Unnamed: 0,username,comment,karma,year,month,day,hour,minute,time
0,akanefive,NBC announcer giving incorrect information about the CL right now.,68,2018,5,13,14,31,14.31
1,charcoil23,Dumb fuck announcers don't know that 4th place no longer has to go through qualifying rounds. Imagine getting paid to be this bad at your job.,55,2018,5,13,14,31,14.31
2,a_magic_wizard,"""Toby is our best defender and the core of our defence.""\n\n*Toby starts, Leicester score 2 in 16 mins*\n\n""Our defence is shit without Jan""",46,2018,5,13,14,27,14.27
3,Keskekun,Sub off lamela and sissoko and bring on Keane and Berbatov,44,2018,5,13,14,41,14.41
4,TheGameIsAboutGlory1,I fucking hate how goalkeepers are a protected species. Kane just got a foul called against him for literally standing. That was the foul. Standing.,46,2018,5,13,14,48,14.48


In [115]:
df_comments_per_minute = df_match_thread.groupby(
    ['time']).size().reset_index(name='counts')

x = df_comments_per_minute.time
y = df_comments_per_minute.counts

# Graph will be written in the following file
output_notebook()

# Generate graph
TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select"

p = figure(title="Comments Per Minute", tools=TOOLS, plot_width=900)

p.square(x, y, legend="Comments")
p.line(x, y, legend="Comments")
p.xaxis.axis_label = "Hour"
p.xaxis.axis_label_text_color = "#aa6666"
p.xaxis.axis_label_standoff = 10
p.yaxis.axis_label = "Number of Comments"
p.yaxis.axis_label_text_color = "#aa6666"
p.yaxis.axis_label_standoff = 10

show(p)