In [7]:
def get_posts(subreddit):
    from bs4 import BeautifulSoup, SoupStrainer, Comment
    import requests
    import pandas as pd
    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9",
    }

    result = requests.get('https://www.reddit.com' + subreddit, headers=headers)
    soup = BeautifulSoup(result.content, 'html.parser')
    content = soup.findAll(class_="title")

    titles = content[2:-4:2] # need to cut out first two and last 4 and repeating titles

    sentences = [sen.text for sen in titles]
    paragraph = ' '.join(w.strip() for w in [sen.text for sen in titles])
    return paragraph, sentences

In [8]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# nltk.download('vader_lexicon')
# nltk.download('punkt')
def analyze_paragraph(paragraph):
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(paragraph)
    return ss['pos'], ss['neu'], ss['neg'], ss['compound']
    
def analyze_sentences(sentences):
    sid = SentimentIntensityAnalyzer()
    pos, neu, neg, com = 0,0,0,0
    for sentence in sentences:
        ss = sid.polarity_scores(sentence)
        pos += ss['pos']
        neu += ss['neu']
        neg += ss['neg']
        com += ss['compound']
        
    return pos, neu, neg, com


In [9]:
def get_days_teams():
    # get games of the day
    from bs4 import BeautifulSoup, SoupStrainer, Comment
    import requests
    import pandas as pd
    import datetime

    headers = {
        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9",
    }

    now = datetime.datetime.now()

    url = "https://www.basketball-reference.com/boxscores/?month={0}&day={1}&year={2}".format(now.month, now.day-1, now.year)

    result = requests.get(url, headers=headers)
    soup = BeautifulSoup(result.content, 'html.parser')
    content = soup.findAll(class_="teams")

    winners = []
    losers = []
    for teams in content:
        # find winner
        winner = teams.find(class_="winner")
        for td in winner.find('td'):
            winners.append(td.text)
        loser = teams.find(class_="loser")
        for td in loser.find('td'):
            losers.append(td.text)

    return winners, losers

In [10]:
subreddits = {
    'LA Lakers': '/r/lakers',
    'Golden State' : '/r/warriors',
    'Chicago' : '/r/chicagobulls',
    'Toronto' : '/r/torontoraptors',
    'Boston' : '/r/bostonceltics',
    'Cleveland' : '/r/clevelandcavs',
    'New York' : '/r/nyknicks',
    'San Antonio' : '/r/nbaspurs',
    'Miami' : '/r/heat',
    'Houston' : '/r/rockets',
    'Philadelphia' : '/r/sixers',
    'Portland' : '/r/ripcity',
    'Oklahoma City' : '/r/thunder',
    'Minnesota' : '/r/timberwolves',
    'Dallas' : '/r/mavericks',
    'Atlanta' : '/r/atlantahawks',
    'LA Clippers' : '/r/laclippers',
    'Detroit' : '/r/detroitpistons',
    'Washington' : '/r/washingtonwizards',
    'Charlotte' : '/r/charlottehornets',
    'Sacramento' : '/r/kings',
    'Milwaukee' : '/r/mkebucks',
    'Phoenix' : '/r/suns',
    'Indiana' : '/r/pacers',
    'Orlando' : '/r/orlandomagic',
    'Denver' : '/r/denvernuggets',
    'Utah' : '/r/utahjazz',
    'Brooklyn' : '/r/gonets',
    'Memphis' : '/r/memphisgrizzlies',
    'New Orleans' : '/r/nolapelicans'
}

In [11]:
def compute_all():
    winners, losers = get_days_teams()
    winner_data = compute_winners(winners)
    loser_data = compute_losers(losers)
    data = winner_data + loser_data
    return data
    
def compute_winners(winners):
    data = []
    for team in winners:
        paragraph, sentences = get_posts(subreddits[team])
        sen_pos, sen_neu, sen_neg, sen_com = analyze_sentences(sentences)
        par_pos, par_neu, par_neg, par_com = analyze_paragraph(paragraph)
        data.append([team, sen_pos, sen_neu, sen_neg, sen_com, par_pos, par_neu, par_neg, par_com, True])
    return data

def compute_losers(losers):       
    data = []
    for team in losers:
        paragraph, sentences = get_posts(subreddits[team])
        sen_pos, sen_neu, sen_neg, sen_com = analyze_sentences(sentences)
        par_pos, par_neu, par_neg, par_com = analyze_paragraph(paragraph)
        data.append([team, sen_pos, sen_neu, sen_neg, sen_com, par_pos, par_neu, par_neg, par_com, False])
    return data


In [13]:
# store results
import pandas as pd
import datetime
import os

data = compute_all()
df = pd.DataFrame(data, columns=['team', 'sen_pos', 'sen_neu', 'sen_neg', 'sen_com', 'par_pos', 'par_neu', 'par_neg', 'par_com', 'won'])
df['date'] = pd.to_datetime(datetime.datetime.now())
df.index = df['date']
del df['date']
with open('nba_sentiment.csv', 'a') as f:
    df.to_csv(f, header=False)
#     df.to_csv(f, index=False, mode='a', header=(not os.path.exists(f)))


In [14]:
df

Unnamed: 0_level_0,team,sen_pos,sen_neu,sen_neg,sen_com,par_pos,par_neu,par_neg,par_com,won
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-01-08 10:52:41.017404,New York,2.633,21.598,1.769,0.225,0.059,0.875,0.066,-0.663,True
2018-01-08 10:52:41.017404,LA Lakers,2.569,22.901,1.53,1.5856,0.083,0.86,0.056,0.9212,True
2018-01-08 10:52:41.017404,Miami,2.147,23.439,0.413,4.3549,0.121,0.861,0.018,0.9899,True
2018-01-08 10:52:41.017404,Phoenix,3.832,20.673,1.494,4.1028,0.126,0.82,0.053,0.9689,True
2018-01-08 10:52:41.017404,Portland,2.067,22.194,1.74,-0.1625,0.071,0.854,0.075,0.3595,True
2018-01-08 10:52:41.017404,Dallas,1.058,24.83,1.112,-0.2422,0.044,0.909,0.047,-0.2874,False
2018-01-08 10:52:41.017404,Atlanta,3.088,23.635,0.277,5.3227,0.146,0.84,0.014,0.9938,False
2018-01-08 10:52:41.017404,Utah,2.125,23.977,0.898,0.9383,0.093,0.852,0.055,0.8093,False
2018-01-08 10:52:41.017404,Oklahoma City,1.862,23.212,1.926,-0.9072,0.08,0.826,0.094,-0.4777,False
2018-01-08 10:52:41.017404,San Antonio,1.813,24.051,1.136,0.058,0.059,0.872,0.069,-0.6309,False
