In [1]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

In [2]:
engine = create_engine("sqlite:///Data/team_database.db")

In [3]:
training_data = pd.read_sql("WIN_NUM_ROSTER_CHANGE_DATA", engine)

In [4]:
print(training_data.columns)

Index(['SEASON', 'TEAM_ID', 'TEAM_ABBREV', 'CHANGE_NUM_WINS', 'CHANGE_ROSTERS',
       'CHANGE_NET_RTG', 'CHANGE_PLAYOFF_WINS'],
      dtype='object')


In [5]:
ranges = []
prob_increase_num_wins = []
average_increase = []
average_increase_num_playoff_wins = []
prob_increase_num_playoff_wins = []

for i in range(0, 100, 10):
    lower_bound = float(i) / 100
    upper_bound = (float(i) + 10) / 100

    teams = training_data[training_data["CHANGE_ROSTERS"] >= lower_bound]
    teams = teams[teams["CHANGE_ROSTERS"] < upper_bound]

    teams = teams[teams["CHANGE_NET_RTG"] >= 0]
    teams = teams[teams["CHANGE_NET_RTG"] < 5]

    if len(teams) > 0:
        ranges.append("[" + str(lower_bound) + ", " + str(upper_bound) + ")")
        prob_increase_num_wins.append(len(teams[teams["CHANGE_NUM_WINS"] > 0]) / len(teams))
        prob_increase_num_playoff_wins.append(len(teams[teams["CHANGE_PLAYOFF_WINS"] > 0]) / len(teams))
        average_increase.append(np.mean(teams[teams["CHANGE_NUM_WINS"] > 0]["CHANGE_NUM_WINS"]))
        average_increase_num_playoff_wins.append(np.mean(teams[teams["CHANGE_PLAYOFF_WINS"] > 0]["CHANGE_PLAYOFF_WINS"]))

In [6]:
out_dict = {
    "ROSTER_CHANGE_INTERVAL": ranges, 
    "PROB_INCREASE_WINS": prob_increase_num_wins,
    "PROB_INCREASE_PLAYOFF_WINS": prob_increase_num_playoff_wins,
    "AVG_INCREASE_WINS": average_increase, 
    "AVG_INCREASE_PLAYOFF_WINS": average_increase_num_playoff_wins
}
df = pd.DataFrame(out_dict)

In [7]:
print(df)

  ROSTER_CHANGE_INTERVAL  PROB_INCREASE_WINS  PROB_INCREASE_PLAYOFF_WINS  \
0             [0.0, 0.1)            1.000000                    0.000000   
1             [0.1, 0.2)            0.700000                    0.700000   
2             [0.2, 0.3)            0.941176                    0.352941   
3             [0.3, 0.4)            0.871795                    0.487179   
4             [0.4, 0.5)            0.772727                    0.454545   
5             [0.5, 0.6)            0.800000                    0.333333   
6             [0.6, 0.7)            0.692308                    0.282051   
7             [0.7, 0.8)            0.777778                    0.222222   

   AVG_INCREASE_WINS  AVG_INCREASE_PLAYOFF_WINS  
0           3.000000                        NaN  
1           6.000000                   3.714286  
2           6.937500                   6.166667  
3           6.764706                   6.157895  
4           7.921569                   4.833333  
5           6.7

In [8]:
df.to_sql("WIN_COUNT_INCREASE_PROBS", engine, if_exists="replace", index=False)
df.to_csv("Data/win_count_increase_probs.csv")