# First Create Functions to Grab today's games, then align and wrangle the data for machine learning. 

In [1]:
from datetime import datetime
from nba_py import Scoreboard
import numpy as np
import pandas as pd

In [2]:
def get_todays_games():
    
    today = datetime.now()
    day = today.day
    month = today.month
    year = today.year
 
    ex = Scoreboard(month=month,day=day,year=year)
    games = ex.game_header()['GAMECODE']
    matchups = []
    matchups.append(['road','home'])
    for game in games:
        away_team = game[-6:-3]
        home_team = game[-3:]

        if away_team == 'BKN':
            away_team = 'BRK'
        if home_team == 'BKN':
            home_team = 'BRK'
        
        matchups.append([away_team,home_team])
        
    return matchups

In [3]:
get_todays_games()

[['road', 'home'],
 ['HOU', 'OKC'],
 ['BOS', 'PHX'],
 ['LAC', 'POR'],
 ['MIL', 'GSW']]

In [4]:
def create_todays_dataset_p1():
    """Creates a dataset identical to the one used for the ML modeling. This is done by scraping the ngames averages
    of the teams just listed, along with the spread, and cominbing. 
    """
    
    
    """Find teams. 
    """
    today = datetime.now()
    day = today.day
    month = today.month
    year = today.year
    
    
    matchups = get_todays_games()[1:]
    
    #matchups
    import numpy as np
    from nba_py import team
    teams = team.TeamList()
    teamids = teams.info()
    teamids = teamids[:-15]
    teamids = teamids[['TEAM_ID','ABBREVIATION']]
    teamids = teamids.rename(index=str, columns={"ABBREVIATION": "Team"})
    teamids = teamids.replace('BKN','BRK')
    teamids = teamids.sort_values('Team')

    todays_dataframe = []
    for matchup in matchups:
        game_array = []
        for team_ in matchup:
            TEAM_ID = teamids.loc[teamids['Team'] == team_].values[0,0]
            #print(team_,TEAM_ID)
        
            TEAM_splits = team.TeamLastNGamesSplits(team_id=TEAM_ID,season='2018-19')
       # print(TEAM_splits.last20())
            df = TEAM_splits.last20()
        
            #retain (and create) the columns already proven to be statistically correlated to outcome. 
            df['AST/TO'] = df['AST']/df['TOV']

            df = df[['FGM','FG3M','FTM','DREB','AST','STL',
                     'TOV','BLK','PTS','AST/TO','FG3_PCT',
                     'FG_PCT','FT_PCT']]
           # df['AST/TO'] = df['AST']/df['TOV']
            game_array.append(df.values)
        
        matchup_array = np.concatenate((game_array[0],game_array[1]),axis=1)
        todays_dataframe.append(matchup_array)

    #quick formating!
    todays_dataframe = np.array(todays_dataframe)
    todays_dataframe = todays_dataframe[:,0,:]
    return todays_dataframe

def scrape_espn_for_today_spreads(todays_dataframe):
    """Scrape and clean from ESPN, which has the info of all NBA games today and their lines. 
    """
    from bs4 import BeautifulSoup
    import pandas as pd
    import urllib
    matchups = get_todays_games()[1:]


    url  = "http://www.espn.com/nba/lines/_/date"
    page = urllib.request.urlopen(url).read()
    soup = BeautifulSoup(page, "lxml")
    tables = soup.find_all('table')
    table_df = pd.DataFrame([])
    for table in tables:
        table_df =table_df.append(pd.read_html(str(table)))

    team_that_plays = []
    spread_of_that_team = []
    for i, row in enumerate(table_df[1]):
        if row == 'SPREAD':
            print(row)
            try:
                print(table_df[1][i+1])
        
                eh = table_df[1][i+1]
            except: 
                eh = table_df[1][i+2]
                print(eh)

            sign = eh[0]
            try:
                spread, _ = eh[1:6].split('-')
            except:
                spread, _ = eh[1:6].split('+')

       # spread, _ = eh[0:5].split('-')
       # print(spread)
           # print(sign + spread)
            for i, char in enumerate(eh): 
                team = eh[i:i+3]

                if char.isalpha():
                    team = eh[i:i+3]
                    if team[-1].isalpha() == False:
                        if team == 'SA:':
                            team = 'SAS'
                        if team == 'NY:':
                            team = 'NYK'
                        if team == 'GS:':
                            team = 'GSW'
                    #print(team)

                    break
    
              #  if eh[i+3].isalpha() == False:
              #      print(team)
              #      print("BAD!!")
               # team = eh[i:i+3]
             ##   if team =='SA':
             #       team = 'SAS'

               # break
                
        
        
       # print("Spread of the game? " , team, sign+spread)
            team_that_plays.append(team)
            spread_of_that_team.append(sign+spread)
            
    home_spread = []
    print("TEAM THAT PLAYS:")
    print(team_that_plays)
    for s,team in enumerate(team_that_plays):
      #  print(team)
        for game in matchups:
            try:
                ind = game.index(team)
                if ind == 0:
                  #  print(team + " is on the road")
                 #   print("home spread is therefore opposite of ",float(spread_of_that_team[s]))
                 #  print("home spread is therefore ",-float(spread_of_that_team[s]))
                    home_spread.append(-float(spread_of_that_team[s]))

                else:
                 #   print(team + " is at home")
                    home_spread.append(-float(spread_of_that_team[s]))
    

            except:
                pass
        
    ready_for_it = []
    for i in range(len(todays_dataframe)):
        ready = list(todays_dataframe[i])
        ready.append(home_spread[i])
        ready_for_it.append(ready)
  # todays_dataframe[i] = list(todays_dataframe[i]).append(home_spread[i])

    ready_for_it = np.array(ready_for_it)
    
    return matchups,ready_for_it

In [5]:
meh = create_todays_dataset_p1()

In [6]:
matchups, data = scrape_espn_for_today_spreads(meh)

SPREAD
-4.5+4.5HOU: -110OKC: -110
SPREAD
-9.5+9.5BOS: -110PHX: -110
SPREAD
+5.5-5.5LAC: -110POR: -110
SPREAD
+6-6MIL: -110GS: -110
TEAM THAT PLAYS:
['HOU', 'BOS', 'LAC', 'MIL']


# Now import the machine learning model written in the models section, and predict the winners. 

In [7]:
import pickle 
model_path = '../models/finalized_model.sav'
# load the model from disk
loaded_model = pickle.load(open(model_path, 'rb'))
#result = loaded_model.score(X_test, Y_test)
#print(result)

scaler_path = '../models/finalized_scaler.sav'
loaded_scaler = pickle.load(open(scaler_path, 'rb'))


In [8]:
X_today = data

In [9]:
data[0].shape

(27,)

In [10]:
X_today.shape

(4, 27)

In [11]:
X_today = loaded_scaler.transform(X_today)

In [28]:
def spread2ML(spread):
    """Converts spread into a moneyline value using the equation I calculated. 
    """
    if spread <=1.5:
        
        ML = 1.71409498 * spread**3 + 10.90008433 * spread **2 + 22.40247106 * spread - 138.20112341
    else: 

        ML = 1.66494668 * spread**3 -20.03302374 * spread**2 + 101.20347437 * spread - 34.68833849
    
    return ML

In [29]:
spreads = data[:,-1]
for i,game in enumerate(X_today):
    prediction = loaded_model.predict([game])[0]
    
    print("Game: ", matchups[i])
    print("Home Team Spread : ",matchups[i][1], spreads[i])
    print("Approximate Moneyline Odds: ",matchups[i][0],spread2ML(-spreads[i]),matchups[i][1],spread2ML(spreads[i]))
    print("Predicted Winner: ",matchups[i][prediction])

Game:  ['HOU', 'OKC']
Spread of for :  OKC 4.5
Moneylines:  HOU -174.48244055 OKC 166.776831655
Winner:  OKC
Line of prediction
Game:  ['BOS', 'PHX']
Spread of for :  PHX 9.5
Moneylines:  BOS -836.914171175 PHX 546.247935255
Winner:  BOS
Line of prediction
Game:  ['LAC', 'POR']
Spread of for :  POR -5.5
Moneylines:  LAC 192.937306295 POR -216.869715555
Winner:  POR
Line of prediction
Game:  ['MIL', 'GSW']
Spread of for :  GSW -6.0
Moneylines:  MIL 210.97213597 GSW -250.45742957
Winner:  GSW
Line of prediction


# Conclusion: Above contains the scraping necessary to create a dataset based on the games of today, and by downloading the model and scaling tool used in previous sections, allows for the capabilitiies to choose the winner of each game. 

In [26]:
matchups[i][1]

['BOS', 'PHX']

In [15]:
spreads = data[:,-1]

array([ 4.5,  9.5, -5.5, -6. ])

# Layed out below is how you would confirm the ordering is correct, and make sure the prediction is for the corresponding game. 

In [None]:
dataframe1 = pd.DataFrame([])
dataframe2 = pd.DataFrame([])
dataframe1 = dataframe1.append(data)
dataframe2 = matchups


#df =  pd.mergexxxx


#data3 = pd.DataFrame([])

#data3 = games



#pd. sort ....



In [None]:
type(dataframe1)

Make  DataFrame, one of the splits, with an additional column as the home team. 

Now do the same with matchups, and sort the initial dataframe based on that.

In [None]:
games

In [None]:
team_that_plays = ['NYK', 'SAS', 'PHI', 'DEN', 'CHI', 'DAL', 'TOR', 'MIN', 'OKC', 'DET']

In [None]:
data = pd.DataFrame([])
actual_order = {}
for team in team_that_plays:
    print(team)
    for t,game in enumerate(games):
        if team in game:
            print("In game", t)
            
            actual_order[team] = t
actual_order = sorted(actual_order.items(), key=lambda kv: kv[1])


#Now need to 

In [None]:
sorted(actual_order.items())

In [None]:
actual_order

In [None]:
actual_order = sorted(actual_order.items(), key=lambda kv: kv[1])

In [None]:
actual_order

In [None]:
data = pd.DataFrame([])
data = 