In [42]:
import pandas as pd
import numpy as np

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer 

In [43]:
nltk.download('vader_lexicon') 

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/airubin/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [44]:
#import tweets and clean the csv
df = pd.read_csv('tweets.csv')
df = df.drop_duplicates()
df['date'] = pd.to_datetime(df['date'])
df['user'] = df['user'].str.replace('/','',regex=False)

In [45]:
analyser = SentimentIntensityAnalyzer()

In [46]:
df['compound'] = df['text'].apply(lambda x: analyser.polarity_scores(x)['compound'])
df = df[df['compound'] <= -0.05]
df.reset_index(drop=True, inplace=True)

In [47]:
#create subframe of tweets for NBA and filter to only contain tweets from the season
NBA_df = df[df['text'].str.contains('NBA', case=False)]
NBA_df = NBA_df[NBA_df['date'].dt.month.isin([10,11,12,1,2,3,4])]
NBA_df.head()

Unnamed: 0,date,text,user,compound
0,2013-01-02 18:01:25+00:00,NBA Legend Latrell Sprewell -- ARRESTED for Bl...,TMZ,-0.8374
15,2013-10-10 23:42:20+00:00,NBA Star Kendrick Perkins -- ARRESTED ... Alle...,TMZ,-0.5904
22,2013-11-21 04:30:39+00:00,Did you see the STRUGGLE SUIT former NBA’er Tr...,TMZ,-0.5147
23,2013-12-18 01:24:11+00:00,Watch Paul McCartney hit all five stages of gr...,RollingStone,-0.4939
26,2013-11-06 18:26:24+00:00,Dwight Howard -- Fan Detained After Striking N...,TMZ,-0.1027


In [48]:
#create subframe of tweets for MLB and filter to only contain tweets from the season
MLB_df = df[df['text'].str.contains('MLB', case=False)]
MLB_df = MLB_df[MLB_df['date'].dt.month.isin([3,4,5,6,7,8,9,10])]
MLB_df.head()

Unnamed: 0,date,text,user,compound
2,2013-04-22 15:47:24+00:00,Which struggling slugger will rebound in Week ...,nytimes,-0.4215
3,2013-04-29 16:05:44+00:00,Which lefty will strike out the most batters i...,nytimes,-0.128
6,2013-07-08 19:09:58+00:00,50 Cent DROPPED from MLB All-Star Bash After D...,TMZ,-0.6249
10,2013-08-23 03:29:36+00:00,MLB's Ryan Braun apologizes: 'I have no one to...,CNN,-0.1406
13,2013-08-01 09:51:28+00:00,"Alex Rodriguez, #MLB reportedly negotiating de...",FoxNews,-0.7003


In [49]:
#create subframe of tweets for NFL and filter to only contain tweets from the season
NFL_df = df[df['text'].str.contains('NFL', case=False)]
NFL_df = NFL_df[NFL_df['date'].dt.month.isin([8,9,10,11,12,1,2])]
NFL_df.head()

Unnamed: 0,date,text,user,compound
1,2013-02-04 21:46:04+00:00,"NFL finds ""no indication"" that Beyonce caused ...",RollingStone,-0.296
4,2013-08-29 04:20:00+00:00,Read our full in-depth story about NFL player ...,RollingStone,-0.7845
5,2013-08-31 16:45:07+00:00,Patriots drop Tim Tebow after just 12 weeks; N...,nypost,-0.2732
7,2013-08-31 15:23:07+00:00,"Aaron Hernandez was a promising NFL player, bu...",RollingStone,-0.7634
8,2013-08-28 18:06:04+00:00,Aaron Hernandez might have been one of the NFL...,RollingStone,-0.814


In [50]:
NFL_players_df = pd.read_csv('all_NFL_players.csv')
NFL_players_df = NFL_players_df[NFL_players_df['Start Year'] >= 2013]
list_of_NFL = NFL_players_df['Name'].tolist()

In [51]:
NFL_df['contains_player'] = NFL_df['text'].apply(lambda x: any(name in x for name in list_of_NFL))
NFL_df = NFL_df[NFL_df['contains_player']]
NFL_df.head()

Unnamed: 0,date,text,user,compound,contains_player
58,2015-12-22 00:37:07+00:00,Odell Beckham Jr. is suspended for one game by...,nytimes,-0.4767,True
92,2016-11-14 12:53:01+00:00,NFL star Mike Evans sits during national anthe...,CNN,-0.25,True
132,2019-11-30 02:03:07+00:00,"Josh Shaw, a defensive back for the Arizona Ca...",CNN,-0.4588,True
147,2020-02-27 20:34:02+00:00,"NFL's Mark Walton Arrested Again, Fifth Time I...",TMZ,-0.4767,True
154,2020-02-27 22:54:32+00:00,NFL Prospect Mitchell Wilcox Violently Drilled...,TMZ,-0.3818,True


In [53]:
NBA_players_df = pd.read_csv('all_NBA_players.csv')
NBA_players_df = NBA_players_df[NBA_players_df['From Year'] >= 2013]
list_of_NBA = NBA_players_df['Player Name'].tolist()

In [56]:
NBA_df['contains_player'] = NBA_df['text'].apply(lambda x: any(name in x for name in list_of_NBA))
NBA_df = NBA_df[NBA_df['contains_player']]
NBA_df

Unnamed: 0,date,text,user,compound,contains_player
155,2020-02-26 19:58:32+00:00,NBA's Malik Monk Suspended for Violating Leagu...,TMZ,-0.765,True
224,2020-10-01 04:17:42+00:00,LA Lakers crush Miami Heat in NBA finals opene...,guardian,-0.1531,True
232,2020-10-27 15:19:02+00:00,NBA's Tyler Herro Visits 'Turks & Cake' W/ Kat...,TMZ,-0.3182,True
326,2023-04-20 23:00:07+00:00,"Joel Embiid, the 7-foot, 280-pound center of t...",nytimes,-0.1692,True
329,2023-10-25 19:20:06+00:00,"From @TheAthletic: Victor Wembanyama, the NBA'...",nytimes,-0.3566,True
333,2023-10-26 19:15:03+00:00,From @TheAthletic: The hype that came with Vic...,nytimes,-0.0613,True
