In [0]:
import pandas as pd 
import pprint as pp
from datetime import datetime
date_format = "%Y-%m-%d"
import time
from multiprocessing.dummy import Pool as ThreadPool 
import numpy as np


In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
# Convert MIN string into int
def ConvertMinutes(x):
    return float(x.split(':')[0])

# Convert GAME_DATE_EST into datetime 
def ConvertGameDate(x):
    return datetime.strptime(x.split('T')[0], date_format)

# Calculate Offensive Rating
def CalcOffRtg(x):
    poss = 0.96*((x['FGA'])+(x['TO'])+0.44*(x['FTA'])-(x['OREB']))
    off_rtg = ((x['PTS'])/poss) * 100
    return off_rtg

# Read in data and do some preproccessing
def ImportData(team_stats_csv, summaries_csv):
    team_stats = pd.read_csv(team_stats_csv)
    summaries = pd.read_csv(summaries_csv)
    game_dates = summaries[['GAME_DATE_EST','HOME_TEAM_ID', 'VISITOR_TEAM_ID', 'GAME_ID']].copy()

    team_stats = pd.merge(team_stats, game_dates, on=['GAME_ID'])
    team_stats = team_stats.drop(columns=['TEAM_ABBREVIATION', 'TEAM_CITY'])

    # Change GAME_DATE_EST to datetime object
    team_stats['GAME_DATE_EST'] = team_stats['GAME_DATE_EST'].apply(ConvertGameDate)


    # Calculate Off Rtg
    team_stats['OFF_RTG'] = team_stats.apply(CalcOffRtg, axis=1)

    # Add Home or Away suffixes
    away, home = [x for _, x in team_stats.groupby(team_stats['TEAM_ID'] == team_stats['HOME_TEAM_ID'])]
    home = home.add_suffix('_HOME')
    away = away.add_suffix('_AWAY')

    team_stats = pd.merge(home, away, left_on=['GAME_ID_HOME'], right_on=['GAME_ID_AWAY'])
    team_stats = team_stats.drop(columns=['Unnamed: 0_HOME', 'Unnamed: 0_AWAY'])
    team_stats['MIN'] = team_stats['MIN_HOME'].apply(ConvertMinutes)
    team_stats = team_stats.drop(columns=['MIN_HOME', 'MIN_AWAY'])
    
    # Add binary WIN_HOME field
    team_stats['WIN_HOME'] = 0
    team_stats.loc[team_stats['PTS_HOME'] > team_stats['PTS_AWAY'], 'WIN_HOME'] = 1
    return team_stats

# Find the last N games with a common opponent for each game
def LastNCommonOpponents(n, df):
    print("***** FINDING LAST N COMMON OPPONENTS *****")
    iteration = 0
    tuples = pd.DataFrame()

    for index, i in df.iterrows():
        # Create Lists and Variables
        tuples_a = pd.DataFrame()
        tuples_b = pd.DataFrame()
        team_a_games = []
        team_b_games = []
        team_a_opponents = []
        team_b_opponents = []
        distances = []
        selected_games = []
        team_a = i['TEAM_NAME_HOME']
        team_b = i['TEAM_NAME_AWAY']
        game_date = i['GAME_DATE_EST_HOME']

        # Find common matchups
        for fuck, j in df.iloc[index:].iterrows():
            if (team_a == j['TEAM_NAME_HOME'] or team_a == j['TEAM_NAME_AWAY']) and j['GAME_DATE_EST_HOME'] < game_date:
                if team_a == j['TEAM_NAME_HOME']:
                    team_a_opponents.append(j['TEAM_NAME_AWAY'])
                    j = j.append(pd.Series({'OPPONENT': 'AWAY'}))
                else:
                    team_a_opponents.append(j['TEAM_NAME_HOME'])
                    j = j.append(pd.Series({'OPPONENT': 'HOME'}))
                team_a_games.append(j)
            
            elif (team_b == j['TEAM_NAME_HOME'] or team_b == j['TEAM_NAME_AWAY']) and j['GAME_DATE_EST_HOME'] < game_date:
                if team_b == j['TEAM_NAME_HOME']:
                    team_b_opponents.append(j['TEAM_NAME_AWAY'])
                    j = j.append(pd.Series({'OPPONENT': 'AWAY'}))
                else:
                    team_b_opponents.append(j['TEAM_NAME_HOME'])
                    j = j.append(pd.Series({'OPPONENT': 'HOME'}))
                team_b_games.append(j)

            if len(team_a_games) >= 40 and len(team_b_games) >= 40:
                break

        # Calculate distances 
        for k in range(0, len(team_a_games)):
            for l in range(0, len(team_b_games)):
                    if team_a_opponents[k] == team_b_opponents[l]:
                        distance_a = i['GAME_DATE_EST_HOME'] - team_a_games[k]['GAME_DATE_EST_HOME'] 
                        distance_b = i['GAME_DATE_EST_HOME'] - team_b_games[l]['GAME_DATE_EST_HOME']
                        total_distance = distance_a + distance_b
                        distances.append({
                                            'game_a': team_a_games[k],
                                            'game_b': team_b_games[l],
                                            'distance': total_distance
                                        })
        
        sorted_distances = sorted(distances, key=lambda x: x['distance']) 
        temp = (sorted_distances[:n])
        count = 0
        if len(temp) >= n:
            for x in temp:
                temp_a = x['game_a'].to_frame().transpose()
                temp_a = temp_a.add_suffix('_COMMON_MATCHUP_A_' + str(count))
                temp_a['CURRENT_GAME_ID'] = i['GAME_ID_HOME']
                # temp_a['WIN_HOME'] = i['WIN_HOME']
                # temp_a['GAME_DATE_EST'] = i['GAME_DATE_EST_HOME']
                # temp_a['TEAM_NAME_HOME'] = i['TEAM_NAME_HOME']
                # temp_a['TEAM_NAME_AWAY'] = i['TEAM_NAME_AWAY']
                tuples_a = tuples_a.append(temp_a, sort=False)            
                
                temp_b = x['game_b'].to_frame().transpose()
                temp_b = temp_b.add_suffix('_COMMON_MATCHUP_B_' + str(count))
                temp_b['CURRENT_GAME_ID'] = i['GAME_ID_HOME']
                tuples_b = tuples_b.append(temp_b, sort=False)  
                count +=1 
            
            if len(temp) > 0:
                tuples = tuples.append(pd.merge(tuples_a, tuples_b, on=['CURRENT_GAME_ID'], sort=False))
        
        print ('COMPLETED ITERATION: ', iteration)
        iteration += 1

    return tuples

# Find the last N games each team has played for each game
def LastNGames(n, df):
    print("***** FINDING LAST N GAMES *****")
    iteration = 0
    tuples = pd.DataFrame()

    for index, i in df.iterrows():
        previous_games_a = pd.DataFrame()
        previous_games_b = pd.DataFrame()
        team_a = i['TEAM_NAME_HOME']
        team_b = i['TEAM_NAME_AWAY']
        game_date = i['GAME_DATE_EST_HOME']

        # Get previous games for team a
        for index2, j in df.iloc[index:].iterrows():
            if team_a == j['TEAM_NAME_HOME'] and j['GAME_DATE_EST_HOME'] < game_date:
                    j = j.append(pd.Series({'OPPONENT': 'AWAY'}))
                    previous_games_a = previous_games_a.append(j, ignore_index = True)

            elif team_a == j['TEAM_NAME_AWAY'] and j['GAME_DATE_EST_HOME'] < game_date:
                    j = j.append(pd.Series({'OPPONENT': 'HOME'}))
                    previous_games_a = previous_games_a.append(j, ignore_index = True)

            if len(previous_games_a) >= n:
                break
        
        # Get previous games for team b
        for index3, m in df.iloc[index:].iterrows():
            if team_b == m['TEAM_NAME_HOME'] and m['GAME_DATE_EST_HOME'] < game_date:
                    m = m.append(pd.Series({'OPPONENT': 'AWAY'}))
                    previous_games_b = previous_games_b.append(m, ignore_index = True)
                    
            elif team_b == m['TEAM_NAME_AWAY'] and m['GAME_DATE_EST_HOME'] < game_date:
                    m = m.append(pd.Series({'OPPONENT': 'HOME'}))
                    previous_games_b = previous_games_b.append(m, ignore_index = True)

            if len(previous_games_b) >= n:
                break
            
        if previous_games_a.shape[0] >= n and previous_games_b.shape[0] >= n:
            
            tuples_a = pd.DataFrame()
            first_tuple_a = previous_games_a.iloc[0]
            first_tuple_a = first_tuple_a.add_suffix('_PREV_GAME_A_' + str(0))
            first_tuple_a['CURRENT_GAME_ID'] = i['GAME_ID_HOME']
            first_tuple_a['WIN_HOME'] = i['WIN_HOME']
            first_tuple_a['GAME_DATE_EST'] = i['GAME_DATE_EST_HOME']
            first_tuple_a['TEAM_NAME_HOME'] = i['TEAM_NAME_HOME']
            first_tuple_a['TEAM_NAME_AWAY'] = i['TEAM_NAME_AWAY'] 
            tuples_a = tuples_a.append(first_tuple_a)
            previous_games_a = previous_games_a.iloc[1:]

            for index4, k in previous_games_a.iterrows(): 
                k = k.add_suffix('_PREV_GAME_A_' + str(index4))
                k['CURRENT_GAME_ID'] = i['GAME_ID_HOME'] 
                to_merge = k.to_frame().transpose()
                to_merge['CURRENT_GAME_ID'] = to_merge['CURRENT_GAME_ID'].apply(int)
                tuples_a['CURRENT_GAME_ID'] = tuples_a['CURRENT_GAME_ID'].apply(int)
                tuples_a = pd.merge(tuples_a, to_merge, on=['CURRENT_GAME_ID'], sort=False)
            
            tuples_b = pd.DataFrame()
            first_tuple_b = previous_games_b.iloc[0]
            first_tuple_b = first_tuple_b.add_suffix('_PREV_GAME_B_' + str(0))
            first_tuple_b['CURRENT_GAME_ID'] = i['GAME_ID_HOME'] 
            tuples_b = tuples_b.append(first_tuple_b)
            previous_games_b = previous_games_b.iloc[1:]

            for index5, l in previous_games_a.iterrows(): 
                l = l.add_suffix('_PREV_GAME_B_' + str(index5))
                l['CURRENT_GAME_ID'] = i['GAME_ID_HOME']
                to_merge = l.to_frame().transpose()
                to_merge['CURRENT_GAME_ID'] = to_merge['CURRENT_GAME_ID'].apply(int)
                tuples_b['CURRENT_GAME_ID'] = tuples_b['CURRENT_GAME_ID'].apply(int) 
                tuples_b = pd.merge(tuples_b, to_merge, on=['CURRENT_GAME_ID'], sort=False)

            tuples = tuples.append(pd.merge(tuples_a, tuples_b, on=['CURRENT_GAME_ID'], sort=False))
        
        print ('COMPLETED ITERATION: ', iteration)
        iteration += 1
    
    return tuples

def GetDefRtgHome(x):
    sum_rtg = 0
    suffixes = [c[8:] for c in x.index if 'OPPONENT' in c and '_A' in c]
    for suffix in suffixes:
        if x['OPPONENT' + suffix] == 'HOME':
            sum_rtg = sum_rtg + x['OFF_RTG_HOME' + suffix]
        elif x['OPPONENT' + suffix] == 'AWAY':
            sum_rtg = sum_rtg + x['OFF_RTG_AWAY' + suffix]

    return sum_rtg/LAST_N

def GetOffRtgHome(x):
    sum_rtg = 0
    suffixes = [c[8:] for c in x.index if 'OPPONENT' in c and '_A' in c]
    for suffix in suffixes:
        if x['OPPONENT' + suffix] == 'HOME':
            sum_rtg = sum_rtg + x['OFF_RTG_AWAY' + suffix]
        elif x['OPPONENT' + suffix] == 'AWAY':
            sum_rtg = sum_rtg + x['OFF_RTG_HOME' + suffix]

    return sum_rtg/LAST_N

def GetDefRtgAway(x): 
    sum_rtg = 0
    suffixes = [c[8:] for c in x.index if 'OPPONENT' in c and '_B' in c]
    for suffix in suffixes:
        if x['OPPONENT' + suffix] == 'HOME':
            sum_rtg = sum_rtg + x['OFF_RTG_HOME' + suffix]
        elif x['OPPONENT' + suffix] == 'AWAY':
            sum_rtg = sum_rtg + x['OFF_RTG_AWAY' + suffix]

    return sum_rtg/LAST_N

def GetOffRtgAway(x):
    sum_rtg = 0
    suffixes = [c[8:] for c in x.index if 'OPPONENT' in c and '_B' in c]
    for suffix in suffixes:
        if x['OPPONENT' + suffix] == 'HOME':
            sum_rtg = sum_rtg + x['OFF_RTG_AWAY' + suffix]
        elif x['OPPONENT' + suffix] == 'AWAY':
            sum_rtg = sum_rtg + x['OFF_RTG_HOME' + suffix]

    return sum_rtg/LAST_N

def GetNetRtgHome(x):
    return x['OFF_RTG_HOME'] - x['DEF_RTG_HOME']

def GetNetRtgAway(x):
    return x['OFF_RTG_AWAY'] - x['DEF_RTG_AWAY']

def ReduceDimensions(df):
    
    # Calculate OFF and DEF Rating 
    df['DEF_RTG_HOME'] = df.apply(GetDefRtgHome, axis=1)
    df['DEF_RTG_AWAY'] = df.apply(GetDefRtgAway, axis=1)
    df['OFF_RTG_HOME'] = df.apply(GetOffRtgHome, axis=1)
    df['OFF_RTG_AWAY'] = df.apply(GetOffRtgAway, axis=1)
    
    # Calculate NET 
    df['NET_RTG_HOME'] = df.apply(GetNetRtgHome, axis=1)
    df['NET_RTG_AWAY'] = df.apply(GetNetRtgAway, axis=1)

    # Extract wanted columns
    cols = [c for c in df.columns if c == 'CURRENT_GAME_ID'
                                    or c == 'TEAM_NAME_HOME' 
                                    or c == 'TEAM_NAME_AWAY'
                                    or c == 'GAME_DATE_EST'
                                    or c == 'WIN_HOME'
                                    or c == 'NET_RTG_HOME'
                                    or c == 'NET_RTG_AWAY'
                                    # or c == 'OFF_RTG_HOME'
                                    # or c == 'OFF_RTG_AWAY'
                                    # or c == 'DEF_RTG_HOME'
                                    # or c == 'DEF_RTG_AWAY'
                                    # or c[:3] == 'PTS' 
                                    # or c[:3] == 'FGA'
                                    # or c[:4] == 'FG3A'
                                    # or c[:3] == 'FTA'
                                    # or c[:4] == 'OREB'
                                    # or c[:2] == 'TO'
                                    # or c[:8] == 'WIN_AWAY'
            ]
    return df[cols]

def EncodeTeamName(df):
    print (df)
    # Get TEAM_NAME columns
    cols = [c for c in df.columns if c[:9] == 'TEAM_NAME']
    # Encode team names
    for col in cols:        
        temp = pd.get_dummies(df[col])
        #df = df.drop(columns=[col])
        df = pd.concat([df, temp], axis=1)
    return df

In [0]:
Location = r'/content/gdrive/My Drive/ML Project Fall 2018/Code/prev5_com1_2015-2016.csv'
team_stats = pd.read_csv(Location)

In [0]:
# Main
df_2018 = ImportData('/content/gdrive/My Drive/ML Project Fall 2018/Code/2017-2018_team_stats.csv', '/content/gdrive/My Drive/ML Project Fall 2018/Code/2017-2018_summary.csv')
df_2017 = ImportData('/content/gdrive/My Drive/ML Project Fall 2018/Code/2016-2017_team_stats.csv', '/content/gdrive/My Drive/ML Project Fall 2018/Code/2016-2017_summary.csv')
df_2016 = ImportData('/content/gdrive/My Drive/ML Project Fall 2018/Code/2015-2016_team_stats.csv', '/content/gdrive/My Drive/ML Project Fall 2018/Code/2015-2016_summary.csv')
df_2015 = ImportData('/content/gdrive/My Drive/ML Project Fall 2018/Code/2014-2015_team_stats.csv', '/content/gdrive/My Drive/ML Project Fall 2018/Code/2014-2015_summary.csv')

df = pd.DataFrame()
df = df.append(df_2018, ignore_index=True)
df = df.append(df_2017, ignore_index=True)
df = df.append(df_2016, ignore_index=True)
df = df.append(df_2015, ignore_index=True)

In [21]:
df.head(100)

Unnamed: 0,GAME_ID_HOME,TEAM_ID_HOME,TEAM_NAME_HOME,FGM_HOME,FGA_HOME,FG_PCT_HOME,FG3M_HOME,FG3A_HOME,FG3_PCT_HOME,FTM_HOME,...,TO_AWAY,PF_AWAY,PTS_AWAY,PLUS_MINUS_AWAY,GAME_DATE_EST_AWAY,HOME_TEAM_ID_AWAY,VISITOR_TEAM_ID_AWAY,OFF_RTG_AWAY,MIN,WIN_HOME
0,21701213,1610612754,Pacers,40,99,0.404,8,31,0.258,5,...,6,8,119,26.0,2018-04-10,1610612754,1610612766,127.424274,240.0,0
1,21701214,1610612737,Hawks,41,98,0.418,13,37,0.351,18,...,18,22,121,8.0,2018-04-10,1610612737,1610612755,117.576182,240.0,0
2,21701216,1610612742,Mavericks,37,97,0.381,8,33,0.242,15,...,14,16,124,27.0,2018-04-10,1610612742,1610612756,137.061404,240.0,0
3,21701217,1610612762,Jazz,48,90,0.533,13,35,0.371,10,...,15,18,79,-40.0,2018-04-10,1610612762,1610612744,83.089324,240.0,1
4,21701218,1610612747,Lakers,38,90,0.422,10,36,0.278,13,...,18,19,105,6.0,2018-04-10,1610612747,1610612745,104.126999,240.0,0
5,21701215,1610612764,Wizards,41,83,0.494,15,30,0.500,16,...,16,18,101,-12.0,2018-04-10,1610612764,1610612738,103.266915,240.0,1
6,21701207,1610612752,Knicks,46,87,0.529,11,30,0.367,6,...,10,15,123,14.0,2018-04-09,1610612752,1610612739,127.716308,240.0,0
7,21701204,1610612765,Pistons,35,78,0.449,9,30,0.300,19,...,9,17,108,10.0,2018-04-09,1610612765,1610612761,125.334225,240.0,0
8,21701205,1610612751,Nets,41,82,0.500,18,42,0.429,14,...,19,17,105,-9.0,2018-04-09,1610612751,1610612741,105.901433,240.0,1
9,21701206,1610612748,Heat,40,87,0.460,5,22,0.227,8,...,9,15,115,22.0,2018-04-09,1610612748,1610612760,120.128025,240.0,0


In [0]:
# Find the days snce the last date both teams played
def LastDatePlayed(df):
    print("***** FINDING DAYS SINCE LAST GAME*****")
    iteration = 0
    tuples = pd.DataFrame()
    for index, i in df.iterrows():
        # Create Lists and Variables
        tuples_a = pd.DataFrame()
        tuples_b = pd.DataFrame()
        team_a_games = []
        team_b_games = []
        team_a_date = []
        team_b_date = []
        distances = []
        selected_games = []
        team_a = i['TEAM_NAME_HOME']
        team_b = i['TEAM_NAME_AWAY']
        game_date = i['GAME_DATE_EST_HOME']

        # Find common matchups
        for fuck, j in df.iloc[index:].iterrows():
            if (team_a == j['TEAM_NAME_HOME'] or team_a == j['TEAM_NAME_AWAY']) and j['GAME_DATE_EST_HOME'] < game_date and len(team_a_games)<1:
                if team_a == j['TEAM_NAME_HOME']:
                    j = j.append(pd.Series({'OPPONENT': 'AWAY'}))
                else:
                    j = j.append(pd.Series({'OPPONENT': 'HOME'}))
                team_a_games.append(j)
                
            elif (team_b == j['TEAM_NAME_HOME'] or team_b == j['TEAM_NAME_AWAY']) and j['GAME_DATE_EST_HOME'] < game_date and len(team_b_games)<1:
                if team_b == j['TEAM_NAME_HOME']:
                    j = j.append(pd.Series({'OPPONENT': 'AWAY'}))
                else:
                    j = j.append(pd.Series({'OPPONENT': 'HOME'}))
                team_b_games.append(j)

            if len(team_a_games) >= 1 and len(team_b_games) >= 1:
                break

        # Calculate distances 
        if len(team_a_games)>0:
          distance_a = i['GAME_DATE_EST_HOME'] - team_a_games[0]['GAME_DATE_EST_HOME'] 
          distance_a = (distance_a / np.timedelta64(1, 'D')).astype(int)
          df.loc[index, '_DAYS_SINCE_LAST_GAME_A_'] = distance_a
        else:
          df.loc[index, '_DAYS_SINCE_LAST_GAME_A_'] = np.NaN
          
        if len(team_b_games)>0:
          distance_b = i['GAME_DATE_EST_HOME'] - team_b_games[0]['GAME_DATE_EST_HOME']
          distance_b = (distance_b / np.timedelta64(1, 'D')).astype(int)
          df.loc[index,'_DAYS_SINCE_LAST_GAME_B_'] = distance_b
        else:
          df.loc[index,'_DAYS_SINCE_LAST_GAME_B_'] = np.NaN
        #print(df.head(10))
        print(index,df.loc[index, '_DAYS_SINCE_LAST_GAME_A_'], df.loc[index,'_DAYS_SINCE_LAST_GAME_B_'])
    return df


In [50]:
LAST_N = 1
df2 = df
df2['_DAYS_SINCE_LAST_GAME_A_'] = np.NaN
df2['_DAYS_SINCE_LAST_GAME_B_'] = np.NaN
df2= LastDatePlayed(df2)

df2

***** FINDING DAYS SINCE LAST GAME*****
0 2.0 4.0
1 2.0 2.0
2 2.0 2.0
3 2.0 2.0
4 2.0 3.0
5 4.0 2.0
6 2.0 3.0
7 1.0 1.0
8 2.0 3.0
9 3.0 2.0
10 2.0 2.0
11 2.0 1.0
12 3.0 1.0
13 2.0 3.0
14 2.0 2.0
15 2.0 2.0
16 2.0 2.0
17 2.0 2.0
18 2.0 2.0
19 2.0 2.0
20 2.0 3.0
21 2.0 1.0
22 2.0 2.0
23 1.0 2.0
24 1.0 2.0
25 2.0 4.0
26 2.0 1.0
27 3.0 2.0
28 2.0 3.0
29 2.0 3.0
30 2.0 1.0
31 2.0 1.0
32 1.0 2.0
33 2.0 2.0
34 3.0 2.0
35 2.0 3.0
36 3.0 2.0
37 2.0 1.0
38 2.0 2.0
39 2.0 2.0
40 2.0 2.0
41 2.0 2.0
42 2.0 4.0
43 2.0 2.0
44 1.0 1.0
45 3.0 1.0
46 1.0 4.0
47 3.0 3.0
48 1.0 1.0
49 1.0 1.0
50 2.0 3.0
51 2.0 2.0
52 3.0 2.0
53 3.0 2.0
54 2.0 2.0
55 2.0 2.0
56 2.0 2.0
57 2.0 2.0
58 2.0 2.0
59 2.0 3.0
60 2.0 2.0
61 2.0 2.0
62 2.0 2.0
63 1.0 2.0
64 2.0 1.0
65 2.0 3.0
66 3.0 2.0
67 2.0 2.0
68 1.0 1.0
69 2.0 2.0
70 2.0 2.0
71 2.0 2.0
72 2.0 2.0
73 1.0 2.0
74 2.0 2.0
75 2.0 1.0
76 2.0 3.0
77 3.0 2.0
78 3.0 4.0
79 2.0 3.0
80 2.0 2.0
81 2.0 1.0
82 3.0 2.0
83 2.0 2.0
84 2.0 2.0
85 1.0 3.0
86 2.0 3.0
87 2.0 2.0
88

Unnamed: 0,GAME_ID_HOME,TEAM_ID_HOME,TEAM_NAME_HOME,FGM_HOME,FGA_HOME,FG_PCT_HOME,FG3M_HOME,FG3A_HOME,FG3_PCT_HOME,FTM_HOME,...,PTS_AWAY,PLUS_MINUS_AWAY,GAME_DATE_EST_AWAY,HOME_TEAM_ID_AWAY,VISITOR_TEAM_ID_AWAY,OFF_RTG_AWAY,MIN,WIN_HOME,_DAYS_SINCE_LAST_GAME_A_,_DAYS_SINCE_LAST_GAME_B_
0,21701213,1610612754,Pacers,40,99,0.404,8,31,0.258,5,...,119,26.0,2018-04-10,1610612754,1610612766,127.424274,240.0,0,2.0,4.0
1,21701214,1610612737,Hawks,41,98,0.418,13,37,0.351,18,...,121,8.0,2018-04-10,1610612737,1610612755,117.576182,240.0,0,2.0,2.0
2,21701216,1610612742,Mavericks,37,97,0.381,8,33,0.242,15,...,124,27.0,2018-04-10,1610612742,1610612756,137.061404,240.0,0,2.0,2.0
3,21701217,1610612762,Jazz,48,90,0.533,13,35,0.371,10,...,79,-40.0,2018-04-10,1610612762,1610612744,83.089324,240.0,1,2.0,2.0
4,21701218,1610612747,Lakers,38,90,0.422,10,36,0.278,13,...,105,6.0,2018-04-10,1610612747,1610612745,104.126999,240.0,0,2.0,3.0
5,21701215,1610612764,Wizards,41,83,0.494,15,30,0.500,16,...,101,-12.0,2018-04-10,1610612764,1610612738,103.266915,240.0,1,4.0,2.0
6,21701207,1610612752,Knicks,46,87,0.529,11,30,0.367,6,...,123,14.0,2018-04-09,1610612752,1610612739,127.716308,240.0,0,2.0,3.0
7,21701204,1610612765,Pistons,35,78,0.449,9,30,0.300,19,...,108,10.0,2018-04-09,1610612765,1610612761,125.334225,240.0,0,1.0,1.0
8,21701205,1610612751,Nets,41,82,0.500,18,42,0.429,14,...,105,-9.0,2018-04-09,1610612751,1610612741,105.901433,240.0,1,2.0,3.0
9,21701206,1610612748,Heat,40,87,0.460,5,22,0.227,8,...,115,22.0,2018-04-09,1610612748,1610612760,120.128025,240.0,0,3.0,2.0


In [0]:
df2.head(70)

In [54]:
df2 = df2.drop_duplicates()
df3=pd.DataFrame()
df3['CURRENT_GAME_ID'] = df2['GAME_ID_HOME']
df3['DAYS_SINCE_LAST_GAME_A'] = df2['_DAYS_SINCE_LAST_GAME_A_']
df3['DAYS_SINCE_LAST_GAME_B'] = df2['_DAYS_SINCE_LAST_GAME_B_']
df3

Unnamed: 0,CURRENT_GAME_ID,DAYS_SINCE_LAST_GAME_A,DAYS_SINCE_LAST_GAME_B
0,21701213,2.0,4.0
1,21701214,2.0,2.0
2,21701216,2.0,2.0
3,21701217,2.0,2.0
4,21701218,2.0,3.0
5,21701215,4.0,2.0
6,21701207,2.0,3.0
7,21701204,1.0,1.0
8,21701205,2.0,3.0
9,21701206,3.0,2.0


In [0]:
df3.to_csv('/content/gdrive/My Drive/ML Project Fall 2018/Code/days_since_played.csv')

In [45]:
# Reduce df size for testing
#

LAST_N = 5
df1 = LastNGames(5, df)
df1 = df1.drop_duplicates()
df1 = ReduceDimensions(df1)
df1['GAME_DATE_EST'] = pd.to_datetime(df1['GAME_DATE_EST'])
df1['WIN_HOME'] = df1['WIN_HOME'].astype(int)

LAST_N = 1
df2= LastNCommonOpponents(1, df)
df2 = df2.drop_duplicates()
df2 = ReduceDimensions(df2)

print df2

merged = pd.merge(df1, df2, on=['CURRENT_GAME_ID'], sort=False)
merged['NET_RTG_HOME'] = merged[['NET_RTG_HOME_x', 'NET_RTG_HOME_y']].mean(axis=1)
merged['NET_RTG_AWAY'] = merged[['NET_RTG_AWAY_x', 'NET_RTG_AWAY_y']].mean(axis=1)
merged = merged.drop(columns=['NET_RTG_HOME_x', 'NET_RTG_HOME_y'])
merged = merged.drop(columns=['NET_RTG_AWAY_x', 'NET_RTG_AWAY_y'])

merged.to_csv('all_years.csv')
print (merged)
exit()

SyntaxError: ignored