In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import re
from datetime import datetime
import datetime

In [2]:
game_logistics_df = pd.read_csv('total_game_logistics.csv')
away_stats_df = pd.read_csv('total_away_stats.csv')
home_stats_df = pd.read_csv('total_home_stats.csv')
box_score_df = pd.read_csv('total_box_score_df.csv')

In [3]:
all_player_stats_df = away_stats_df.append(home_stats_df)

all_player_stats_df.rename(columns = {'Unnamed: 0':'Name'}, inplace = True)

# delete all rows with team totals
non_players = all_player_stats_df[(all_player_stats_df['Name'] == 'Team Totals')].index
all_player_stats_df.drop(non_players , inplace=True)

# delete all rows with players who did not play
benched_players = all_player_stats_df[(all_player_stats_df['MP'] == 'Did Not Play')].index
all_player_stats_df.drop(benched_players , inplace=True)

# delete all rows with players who did not dress
out_players = all_player_stats_df[(all_player_stats_df['MP'] == 'Did Not Dress')].index
all_player_stats_df.drop(out_players , inplace=True)


In [4]:
all_player_stats_df = all_player_stats_df.fillna(0)
all_player_stats_df['Games'] = 1
all_player_stats_df.sort_values(by=['PTS'], ascending=False).head(25)

Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games
314,LeBron James,LAL,33:57,13.0,20.0,0.65,4.0,6.0,0.667,8.0,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,2/7/23,1
274,Shai Gilgeous-Alexander,OKC,37:30,16.0,23.0,0.696,2.0,3.0,0.667,3.0,...,1.0,7.0,2.0,0.0,0.0,0.0,37.0,-10.0,1/20/2023,1
267,De'Aaron Fox,SAC,33:43,14.0,25.0,0.56,2.0,5.0,0.4,7.0,...,5.0,2.0,0.0,0.0,2.0,5.0,37.0,-20.0,12/19/2022,1
57,Nikola Vučević,ORL,31:22,15.0,23.0,0.652,2.0,5.0,0.4,4.0,...,13.0,0.0,2.0,0.0,3.0,1.0,36.0,12.0,11/17/2018,1
236,De'Aaron Fox,SAC,32:26,15.0,23.0,0.652,3.0,5.0,0.6,3.0,...,5.0,4.0,1.0,1.0,3.0,1.0,36.0,5.0,10/22/2022,1
142,Pascal Siakam,TOR,30:18,10.0,12.0,0.833,3.0,3.0,1.0,9.0,...,8.0,1.0,0.0,2.0,4.0,3.0,32.0,19.0,11/19/2021,1
287,Tyrese Maxey,PHI,41:17,12.0,29.0,0.414,2.0,8.0,0.25,6.0,...,3.0,6.0,0.0,0.0,2.0,2.0,32.0,-4.0,1/21/2023,1
192,Jayson Tatum,BOS,34:09,12.0,16.0,0.75,7.0,10.0,0.7,1.0,...,5.0,4.0,0.0,0.0,2.0,1.0,32.0,40.0,3/18/2022,1
70,Tim Hardaway Jr.,NYK,35:57,9.0,20.0,0.45,5.0,10.0,0.5,9.0,...,2.0,2.0,3.0,0.0,1.0,2.0,32.0,-24.0,11/18/2018,1
72,Aaron Gordon,ORL,29:26,13.0,17.0,0.765,4.0,8.0,0.5,1.0,...,1.0,2.0,1.0,0.0,4.0,3.0,31.0,13.0,11/18/2018,1


In [5]:
# Points scored --> P
# Field goals made --> FG
# Free throws scored --> FT
# Offensive rebounds --> OR
# Defensive rebounds --> DR
# Steals --> S
# Assists --> A
# Blocked shots --> B
# Field goal attempts --> FGA
# Free throws attempts --> FTA
# Personal fouls --> PF
# Turnovers --> T
# games score --> GS

# GS=P+0.4FG−0.7FGA−0.4(FTA−FT)+0.7OR+0.3DR+S+0.7A+0.7B−0.4PF−T

# The game score metric gives a number that could be anything between 0 to 40 or even higher. If a player 
# manages 10 on the game score, they are considered average and extraordinary at 40 or higher.


PTS_list = all_player_stats_df["PTS"].values.tolist()
FG_list = all_player_stats_df["FG"].values.tolist()
FT_list = all_player_stats_df["FT"].values.tolist()
ORB_list = all_player_stats_df["ORB"].values.tolist()
DRB_list = all_player_stats_df["DRB"].values.tolist()
STL_list = all_player_stats_df["STL"].values.tolist()
AST_list = all_player_stats_df["AST"].values.tolist()
BLK_list = all_player_stats_df["BLK"].values.tolist()
FGA_list = all_player_stats_df["FGA"].values.tolist()
FTA_list = all_player_stats_df["FTA"].values.tolist()
PF_list = all_player_stats_df["PF"].values.tolist()
TOV_list = all_player_stats_df["TOV"].values.tolist()
GS_list = []

for i in range(len(PTS_list)):
    GS = ((PTS_list[i]) + (0.4*(FG_list[i])) - (0.7*(FGA_list[i])) - (0.4*((FTA_list[i]) - (FT_list[i]))) + (0.7*(ORB_list[i]))
    + (0.3*(DRB_list[i])) + (STL_list[i]) + (0.7*(AST_list[i])) + (0.7*(BLK_list[i])) - (0.4*(PF_list[i])) - (TOV_list[i]))
    
    GS_list.append(GS)
    
all_player_stats_df["GS"] = GS_list    
all_player_stats_df  

Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games,GS
0,Gordon Hayward,UTA,34:46,5.0,13.0,0.385,2.0,4.0,0.500,3.0,...,7.0,2.0,0.0,4.0,2.0,15.0,12.0,2/11/2014,1,12.0
1,Trey Burke,UTA,28:49,3.0,11.0,0.273,1.0,3.0,0.333,1.0,...,8.0,1.0,0.0,1.0,1.0,8.0,2.0,2/11/2014,1,8.3
2,Derrick Favors,UTA,27:03,4.0,9.0,0.444,0.0,0.0,0.000,3.0,...,1.0,1.0,3.0,1.0,3.0,11.0,-4.0,2/11/2014,1,10.1
3,Marvin Williams,UTA,22:29,4.0,8.0,0.500,1.0,2.0,0.500,0.0,...,1.0,0.0,0.0,1.0,0.0,9.0,-4.0,2/11/2014,1,8.0
4,Richard Jefferson,UTA,19:53,0.0,4.0,0.000,0.0,2.0,0.000,2.0,...,2.0,0.0,0.0,0.0,0.0,2.0,-13.0,2/11/2014,1,0.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319,Russell Westbrook,LAL,28:00,10.0,19.0,0.526,4.0,7.0,0.571,3.0,...,8.0,2.0,1.0,6.0,1.0,27.0,4.0,2/7/23,1,19.6
320,Troy Brown Jr.,LAL,22:17,4.0,6.0,0.667,1.0,2.0,0.500,0.0,...,1.0,1.0,0.0,0.0,1.0,9.0,9.0,2/7/23,1,9.9
321,Lonnie Walker IV,LAL,18:54,3.0,7.0,0.429,2.0,5.0,0.400,0.0,...,3.0,0.0,0.0,1.0,2.0,8.0,-5.0,2/7/23,1,4.9
322,Thomas Bryant,LAL,17:24,2.0,3.0,0.667,0.0,1.0,0.000,0.0,...,1.0,0.0,1.0,0.0,0.0,4.0,1.0,2/7/23,1,6.0


In [6]:
col_list = all_player_stats_df.columns.tolist()
col_list.remove('Name')
col_list.remove('MP')
col_list.remove('FG%')
col_list.remove('3P%')
col_list.remove('FT%')
col_list.remove('Date')

total_sum_list = []

for col in col_list: 
    sum_list = []
    if col == "Team":           
        players_list = all_player_stats_df["Name"].values.tolist()
        teams_list = all_player_stats_df["Team"].values.tolist()
        new_teams_list = []
        
        for i in range(len(players_list)):
            temp_list = []
            temp_list.append(players_list[i])
            temp_list.append(teams_list[i])
            new_teams_list.append(temp_list)
        
        new_teams_df = pd.DataFrame(new_teams_list, columns = ("Name", "Team"))
        new_teams_df = new_teams_df.drop_duplicates()

        new_teams_df = new_teams_df.groupby('Name')['Team'].apply('/'.join).reset_index()
        new_teams_df = new_teams_df.set_index('Name')
    
    else:
        sum_list = all_player_stats_df.groupby('Name')[col].sum()
        total_sum_list.append(sum_list)
    
    
    
temp_player_stat_sum_df = pd.DataFrame(total_sum_list)
temp_player_stat_sum_df = temp_player_stat_sum_df.transpose()
player_stat_sum_df = pd.concat([temp_player_stat_sum_df, new_teams_df], axis=1, join="inner")

player_stat_sum_df = player_stat_sum_df.rename(columns={'3P': 'Three_P', '3PA': 'Three_PA'})

FG_list = player_stat_sum_df['FG'].tolist()
FGA_list = player_stat_sum_df['FGA'].tolist()
FGP_list = []
for i in range(len(FG_list)):
    if FGA_list[i] == 0:
        FGP_list.append(0)
    else:
        FGP = FG_list[i] / FGA_list[i]
        FGP_list.append(FGP)

player_stat_sum_df['FG%'] = FGP_list
        
Three_P_list = player_stat_sum_df['Three_P'].tolist()
Three_PA_list = player_stat_sum_df['Three_PA'].tolist()
Three_P_P_list = []
for i in range(len(Three_P_list)):
    if Three_P_list[i] == 0:
        Three_P_P_list.append(0)
    else:
        Three_P_P = Three_P_list[i] / Three_PA_list[i]
        Three_P_P_list.append(Three_P_P)

player_stat_sum_df['3P%'] = Three_P_P_list
    
FT_list = player_stat_sum_df['FT'].tolist()
FTA_list = player_stat_sum_df['FTA'].tolist()
FTP_list = []
for i in range(len(FT_list)):
    if FT_list[i] == 0:
        FTP_list.append(0)
    else:
        FTP = FT_list[i] / FTA_list[i]
        FTP_list.append(FTP)

player_stat_sum_df['FT%'] = FGP_list

player_stat_sum_df = player_stat_sum_df.rename(columns={'Three_P': '3P', 'Three_PA': '3PA'})

player_stat_sum_df = player_stat_sum_df.fillna(0)

player_stat_sum_df.loc[:, "FG%"] = player_stat_sum_df["FG%"].map('{:.3f}'.format)
player_stat_sum_df.loc[:, "3P%"] = player_stat_sum_df["3P%"].map('{:.3f}'.format)
player_stat_sum_df.loc[:, "FT%"] = player_stat_sum_df["FT%"].map('{:.3f}'.format)

FGP_Col = player_stat_sum_df.pop("FG%")
TPP_Col = player_stat_sum_df.pop("3P%")
FTP_Col = player_stat_sum_df.pop("FT%")

player_stat_sum_df.insert(6, 'FT%', FTP_Col)
player_stat_sum_df.insert(4, '3P%', TPP_Col)
player_stat_sum_df.insert(2, 'FG%', FGP_Col)

first_column = player_stat_sum_df.pop('Games')
second_column = player_stat_sum_df.pop('Team')

player_stat_sum_df.insert(0, 'Team', second_column)
player_stat_sum_df.insert(0, 'Games', first_column)


player_stat_sum_df

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Gordon,3.0,ORL,23.0,41.0,0.561,6.0,13.0,0.462,5.0,8.0,...,10.0,17.0,8.0,4.0,1.0,5.0,5.0,57.0,29.0,47.5
Aaron Holiday,1.0,PHO,4.0,10.0,0.400,1.0,1.0,1.000,0.0,0.0,...,3.0,5.0,7.0,1.0,0.0,3.0,1.0,9.0,13.0,8.4
Aaron Wiggins,1.0,OKC,2.0,5.0,0.400,1.0,1.0,1.000,0.0,0.0,...,3.0,3.0,0.0,0.0,1.0,0.0,3.0,5.0,4.0,2.7
Al Horford,1.0,ATL,6.0,13.0,0.462,0.0,0.0,0.000,1.0,2.0,...,4.0,5.0,0.0,1.0,2.0,2.0,0.0,13.0,0.0,8.2
Alec Burks,2.0,UTA,11.0,19.0,0.579,2.0,4.0,0.500,10.0,15.0,...,3.0,5.0,3.0,1.0,0.0,2.0,5.0,34.0,8.0,24.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wes Iwundu,2.0,ORL,0.0,2.0,0.000,0.0,2.0,0.000,2.0,2.0,...,1.0,2.0,3.0,0.0,2.0,1.0,3.0,2.0,25.0,2.9
Wesley Johnson,1.0,LAL,7.0,14.0,0.500,1.0,3.0,0.333,0.0,0.0,...,6.0,6.0,0.0,0.0,0.0,2.0,3.0,15.0,-14.0,6.6
Xavier Munford,1.0,MEM,4.0,8.0,0.500,1.0,1.0,1.000,1.0,2.0,...,2.0,2.0,1.0,2.0,0.0,3.0,3.0,10.0,-5.0,4.7
Zach Randolph,1.0,MEM,5.0,15.0,0.333,1.0,2.0,0.500,4.0,6.0,...,5.0,7.0,7.0,0.0,0.0,1.0,3.0,15.0,-1.0,11.3


In [7]:
player_stat_sum_df.sort_values(by=['3P%'], ascending=False).head(50)

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Pascal Siakam,1.0,TOR,10.0,12.0,0.833,3.0,3.0,1.0,9.0,10.0,...,8.0,8.0,1.0,0.0,2.0,4.0,3.0,32.0,19.0,26.5
Noah Vonleh,1.0,NYK,2.0,5.0,0.4,1.0,1.0,1.0,2.0,2.0,...,7.0,9.0,1.0,0.0,1.0,2.0,4.0,7.0,-4.0,5.6
D.J. Augustin,3.0,HOU/ORL,13.0,17.0,0.765,7.0,7.0,1.0,12.0,14.0,...,6.0,6.0,14.0,0.0,0.0,7.0,7.0,45.0,24.0,39.3
Jock Landale,1.0,SAS,1.0,2.0,0.5,1.0,1.0,1.0,4.0,4.0,...,1.0,2.0,0.0,1.0,0.0,1.0,1.0,7.0,7.0,6.6
Reggie Jackson,1.0,LAC,4.0,5.0,0.8,3.0,3.0,1.0,3.0,3.0,...,2.0,2.0,5.0,1.0,0.0,3.0,2.0,14.0,0.0,13.4
Jahmi'us Ramsey,2.0,SAC,3.0,4.0,0.75,1.0,1.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,0.0,1.0,2.0,7.0,6.0,5.6
Nicolas Batum,1.0,LAC,3.0,3.0,1.0,1.0,1.0,1.0,0.0,0.0,...,4.0,5.0,3.0,2.0,1.0,1.0,5.0,7.0,-3.0,9.8
Payton Pritchard,1.0,BOS,5.0,5.0,1.0,4.0,4.0,1.0,0.0,0.0,...,4.0,5.0,8.0,1.0,0.0,0.0,0.0,14.0,31.0,21.0
Sam Hauser,1.0,BOS,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,...,2.0,2.0,0.0,0.0,0.0,1.0,0.0,3.0,9.0,2.3
Malik Fitts,1.0,BOS,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,3.0,8.0,3.0


In [8]:
col_list = all_player_stats_df.columns.tolist()
col_list.remove('Name')
col_list.remove('MP')
col_list.remove('FG%')
col_list.remove('3P%')
col_list.remove('FT%')
col_list.remove('Date')

total_average_list = []

for col in col_list: 
    average_list = []
    teams_list = []
    
    if col == "Games":
        average_list = all_player_stats_df.groupby('Name')[col].sum()
        total_average_list.append(average_list)
        
    elif col == "Team":           
        players_list = all_player_stats_df["Name"].values.tolist()
        teams_list = all_player_stats_df["Team"].values.tolist()
        new_teams_list = []
        
        for i in range(len(players_list)):
            temp_list = []
            temp_list.append(players_list[i])
            temp_list.append(teams_list[i])
            new_teams_list.append(temp_list)
        
        new_teams_df = pd.DataFrame(new_teams_list, columns = ("Name", "Team"))
        new_teams_df = new_teams_df.drop_duplicates()

        new_teams_df = new_teams_df.groupby('Name')['Team'].apply('/'.join).reset_index()
        new_teams_df = new_teams_df.set_index('Name')

    else:
        average_list = all_player_stats_df.groupby('Name')[col].mean()
        total_average_list.append(average_list)
    
temp_player_stat_average_df = pd.DataFrame(total_average_list)
temp_player_stat_average_df = temp_player_stat_average_df.transpose()
player_stat_average_df = pd.concat([temp_player_stat_average_df, new_teams_df], axis=1, join="inner")

player_stat_average_df = player_stat_average_df.rename(columns={'3P': 'Three_P', '3PA': 'Three_PA'})

FG_list2 = player_stat_average_df['FG'].tolist()
FGA_list2 = player_stat_average_df['FGA'].tolist()
FGP_list2 = []
for i in range(len(FG_list2)):
    if FGA_list2[i] == 0:
        FGP_list2.append(0)
    else:
        FGP2 = FG_list2[i] / FGA_list2[i]
        FGP_list2.append(FGP2)

player_stat_average_df['FG%'] = FGP_list2
        
Three_P_list2 = player_stat_average_df['Three_P'].tolist()
Three_PA_list2 = player_stat_average_df['Three_PA'].tolist()
Three_P_P_list2 = []
for i in range(len(Three_P_list2)):
    if Three_P_list2[i] == 0:
        Three_P_P_list2.append(0)
    else:
        Three_P_P2 = Three_P_list2[i] / Three_PA_list2[i]
        Three_P_P_list2.append(Three_P_P2)

player_stat_average_df['3P%'] = Three_P_P_list2
    
FT_list2 = player_stat_average_df['FT'].tolist()
FTA_list2 = player_stat_average_df['FTA'].tolist()
FTP_list2 = []
for i in range(len(FT_list2)):
    if FT_list2[i] == 0:
        FTP_list2.append(0)
    else:
        FTP2 = FT_list2[i] / FTA_list2[i]
        FTP_list2.append(FTP2)

player_stat_average_df['FT%'] = FGP_list2
      
player_stat_average_df = player_stat_average_df.rename(columns={'Three_P': '3P', 'Three_PA': '3PA'})

player_stat_average_df = player_stat_average_df.fillna(0)

player_stat_average_df.loc[:, "FG%"] = player_stat_average_df["FG%"].map('{:.3f}'.format)
player_stat_average_df.loc[:, "3P%"] = player_stat_average_df["3P%"].map('{:.3f}'.format)
player_stat_average_df.loc[:, "FT%"] = player_stat_average_df["FT%"].map('{:.3f}'.format)

FGP_Col = player_stat_average_df.pop("FG%")
TPP_Col = player_stat_average_df.pop("3P%")
FTP_Col = player_stat_average_df.pop("FT%")

player_stat_average_df.insert(6, 'FT%', FTP_Col)
player_stat_average_df.insert(4, '3P%', TPP_Col)
player_stat_average_df.insert(2, 'FG%', FGP_Col)

player_stat_average_df = player_stat_average_df.round(2)

first_column2 = player_stat_average_df.pop('Games')
second_column2 = player_stat_average_df.pop('Team')

player_stat_average_df.insert(0, 'Team', second_column2)
player_stat_average_df.insert(0, 'Games', first_column2)

player_stat_average_df

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Gordon,3.0,ORL,7.67,13.67,0.561,2.0,4.33,0.462,1.67,2.67,...,3.33,5.67,2.67,1.33,0.33,1.67,1.67,19.0,9.67,15.83
Aaron Holiday,1.0,PHO,4.00,10.00,0.400,1.0,1.00,1.000,0.00,0.00,...,3.00,5.00,7.00,1.00,0.00,3.00,1.00,9.0,13.00,8.40
Aaron Wiggins,1.0,OKC,2.00,5.00,0.400,1.0,1.00,1.000,0.00,0.00,...,3.00,3.00,0.00,0.00,1.00,0.00,3.00,5.0,4.00,2.70
Al Horford,1.0,ATL,6.00,13.00,0.462,0.0,0.00,0.000,1.00,2.00,...,4.00,5.00,0.00,1.00,2.00,2.00,0.00,13.0,0.00,8.20
Alec Burks,2.0,UTA,5.50,9.50,0.579,1.0,2.00,0.500,5.00,7.50,...,1.50,2.50,1.50,0.50,0.00,1.00,2.50,17.0,4.00,12.25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wes Iwundu,2.0,ORL,0.00,1.00,0.000,0.0,1.00,0.000,1.00,1.00,...,0.50,1.00,1.50,0.00,1.00,0.50,1.50,1.0,12.50,1.45
Wesley Johnson,1.0,LAL,7.00,14.00,0.500,1.0,3.00,0.333,0.00,0.00,...,6.00,6.00,0.00,0.00,0.00,2.00,3.00,15.0,-14.00,6.60
Xavier Munford,1.0,MEM,4.00,8.00,0.500,1.0,1.00,1.000,1.00,2.00,...,2.00,2.00,1.00,2.00,0.00,3.00,3.00,10.0,-5.00,4.70
Zach Randolph,1.0,MEM,5.00,15.00,0.333,1.0,2.00,0.500,4.00,6.00,...,5.00,7.00,7.00,0.00,0.00,1.00,3.00,15.0,-1.00,11.30


In [9]:
all_player_stats_df.sort_values(by=['GS'], ascending=False).head(20)

Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games,GS
274,Shai Gilgeous-Alexander,OKC,37:30,16.0,23.0,0.696,2.0,3.0,0.667,3.0,...,7.0,2.0,0.0,0.0,0.0,37.0,-10.0,1/20/2023,1,34.5
266,Domantas Sabonis,SAC,40:44,12.0,19.0,0.632,2.0,2.0,1.0,2.0,...,7.0,1.0,0.0,1.0,4.0,28.0,6.0,12/19/2022,1,30.5
314,LeBron James,LAL,33:57,13.0,20.0,0.65,4.0,6.0,0.667,8.0,...,3.0,3.0,0.0,4.0,3.0,38.0,8.0,2/7/23,1,30.4
70,Nikola Vučević,ORL,31:37,10.0,15.0,0.667,2.0,3.0,0.667,6.0,...,9.0,0.0,2.0,3.0,2.0,28.0,14.0,11/18/2018,1,30.0
57,Nikola Vučević,ORL,31:22,15.0,23.0,0.652,2.0,5.0,0.4,4.0,...,0.0,2.0,0.0,3.0,1.0,36.0,12.0,11/17/2018,1,29.6
97,Trae Young,ATL,39:36,12.0,22.0,0.545,3.0,10.0,0.3,3.0,...,12.0,5.0,0.0,5.0,0.0,30.0,-6.0,11/8/2019,1,29.2
197,Domantas Sabonis,SAC,36:50,12.0,20.0,0.6,1.0,1.0,1.0,5.0,...,5.0,1.0,1.0,4.0,3.0,30.0,-2.0,3/18/2022,1,28.4
236,De'Aaron Fox,SAC,32:26,15.0,23.0,0.652,3.0,5.0,0.6,3.0,...,4.0,1.0,1.0,3.0,1.0,36.0,5.0,10/22/2022,1,27.7
192,Jayson Tatum,BOS,34:09,12.0,16.0,0.75,7.0,10.0,0.7,1.0,...,4.0,0.0,0.0,2.0,1.0,32.0,40.0,3/18/2022,1,27.5
14,Jeff Teague,ATL,35:41,9.0,17.0,0.529,2.0,4.0,0.5,6.0,...,8.0,3.0,0.0,0.0,0.0,26.0,24.0,1/2/2015,1,27.1


In [10]:
# val = all_player_stats_df['MP'].values[0]
# val2 = all_player_stats_df['MP'].values[1]

# datetime_object = datetime.datetime.strptime(val, '%M:%S').time()
# datetime_object2 = datetime.datetime.strptime(val2, '%M:%S').time()

# timeList = [val, val2]

# mysum = datetime.timedelta()
# for i in timeList:
#     (m, s) = i.split(':')
#     d = datetime.timedelta(minutes=int(m), seconds=int(s))
#     mysum += d
# print(str(mysum))

# time_list = all_player_stats_df['MP']
# time_sum_list = []
# for i in range(len(time_list)):
#     val = all_player_stats_df['MP'].values[i]
#     datetime_object = datetime.datetime.strptime(val, '%M:%S').time()


In [11]:
box_score_df

Unnamed: 0,Date,Home_Away,Team,1Q,2Q,3Q,4Q,T,1OT
0,2/11/2014,Away,UTA,16,32,20,28,96,
1,2/11/2014,Home,LAL,27,10,23,19,79,
2,1/2/2015,Away,ATL,31,26,25,16,98,
3,1/2/2015,Home,UTA,21,24,23,24,92,
4,4/3/2016,Away,MEM,30,26,29,22,107,
5,4/3/2016,Home,ORL,29,38,30,22,119,
6,12/27/2017,Away,UTA,24,23,22,32,101,
7,12/27/2017,Home,GSW,23,25,42,36,126,
8,11/17/2018,Away,LAL,31,22,28,36,117,
9,11/17/2018,Home,ORL,25,37,38,30,130,


In [12]:
box_score_df

date_list = box_score_df['Date'].tolist()
final_score_list = box_score_df['T'].tolist()
win_loss_list = []
home_team_wins = 0
away_team_wins = 0
home_team_losses = 0
away_team_losses = 0

try:
    for i in range(len(final_score_list)):
        if date_list[i] == date_list[i+1]:
            if final_score_list[i] > final_score_list[i + 1]:
                win_loss_list.append('W')
                win_loss_list.append('L')
                away_team_wins = away_team_wins + 1
                home_team_losses = home_team_losses + 1
            else:
                win_loss_list.append('L')
                win_loss_list.append('W')
                home_team_wins = home_team_wins + 1
                away_team_losses = away_team_losses + 1
        else:
            continue
except IndexError:
    print("An exception was made")

home_win_loss_percentage = (home_team_wins/(home_team_wins + home_team_losses))
away_win_loss_percentage = (away_team_wins/(away_team_wins + away_team_losses))

An exception was made


In [13]:
home_team_record = ['Home',home_team_wins,home_team_losses,home_win_loss_percentage]
away_team_record = ['Away',away_team_wins,away_team_losses,away_win_loss_percentage]

home_away_record_df = pd.DataFrame(columns = ['', 'Wins', 'Losses','Win%'])
  
home_away_record_df.loc[len(home_away_record_df)] = home_team_record
home_away_record_df.loc[len(home_away_record_df)] = away_team_record
home_away_record_df = home_away_record_df.set_index('')
home_away_record_df.loc[:, "Win%"] = home_away_record_df["Win%"].map('{:.3f}'.format)

home_away_record_df

Unnamed: 0,Wins,Losses,Win%
,,,
Home,9.0,14.0,0.391
Away,14.0,9.0,0.609


In [14]:
box_score_df['Decision'] = win_loss_list
box_score_df

Unnamed: 0,Date,Home_Away,Team,1Q,2Q,3Q,4Q,T,1OT,Decision
0,2/11/2014,Away,UTA,16,32,20,28,96,,W
1,2/11/2014,Home,LAL,27,10,23,19,79,,L
2,1/2/2015,Away,ATL,31,26,25,16,98,,W
3,1/2/2015,Home,UTA,21,24,23,24,92,,L
4,4/3/2016,Away,MEM,30,26,29,22,107,,L
5,4/3/2016,Home,ORL,29,38,30,22,119,,W
6,12/27/2017,Away,UTA,24,23,22,32,101,,L
7,12/27/2017,Home,GSW,23,25,42,36,126,,W
8,11/17/2018,Away,LAL,31,22,28,36,117,,L
9,11/17/2018,Home,ORL,25,37,38,30,130,,W


In [15]:
team_records_df = pd.DataFrame(columns = ['Team', 'Games', 'Wins', 'Losses','Win%'])

for item in box_score_df.Team.unique():
    team_wins = len(box_score_df.loc[(box_score_df['Team'] == item) & (box_score_df['Decision'] == 'W')].index)
    team_losses = len(box_score_df.loc[(box_score_df['Team'] == item) & (box_score_df['Decision'] == 'L')].index)
    
    team_win_percentage = (team_wins/(team_wins + team_losses))
    team_total_games = (team_wins + team_losses)
    
    team_record_list = [item,team_total_games,team_wins,team_losses,team_win_percentage]
    team_records_df.loc[len(team_records_df)] = team_record_list

team_records_df = team_records_df.set_index('Team')
team_records_df.loc[:, "Win%"] = team_records_df["Win%"].map('{:.3f}'.format)

team_records_df.sort_values(by=['Games','Win%'], ascending=False)

Unnamed: 0_level_0,Games,Wins,Losses,Win%
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SAC,14,5,9,0.357
UTA,5,2,3,0.4
ORL,3,3,0,1.0
LAL,3,0,3,0.0
ATL,2,1,1,0.5
MEM,2,1,1,0.5
NOP,2,1,1,0.5
OKC,2,1,1,0.5
GSW,1,1,0,1.0
MIA,1,1,0,1.0


In [21]:
player_records_df = pd.DataFrame(columns = ['Name', 'Games', 'Wins', 'Losses','Win%'])

for player in all_player_stats_df.Name.unique():
    
    temp_df = all_player_stats_df.loc[(all_player_stats_df['Name'] == player)]
    temp_merge_df = pd.merge(temp_df, box_score_df, on=['Team','Date'])
         
    player_wins = 0
    player_losses = 0
        
    for item in temp_merge_df['Decision']:
        if item == 'L':
            player_losses = player_losses + 1
        else:
            player_wins = player_wins + 1

        player_win_percentage = (player_wins/(player_wins + player_losses))
        player_total_games = (player_wins + player_losses)

        player_record_list = [player,player_total_games,player_wins,player_losses,player_win_percentage]
        player_records_df.loc[len(player_records_df)] = player_record_list

player_records_df = player_records_df.set_index('Name')
player_records_df.loc[:, "Win%"] = player_records_df["Win%"].map('{:.3f}'.format)

player_records_df.sort_values(by=['Games','Win%'], ascending=False).head(50)

# temp_merge_df                                      



Unnamed: 0_level_0,Games,Wins,Losses,Win%
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Harrison Barnes,11,4,7,0.364
De'Aaron Fox,10,4,6,0.4
Harrison Barnes,10,3,7,0.3
Davion Mitchell,10,2,8,0.2
Harrison Barnes,9,3,6,0.333
De'Aaron Fox,9,3,6,0.333
Chimezie Metu,9,3,6,0.333
Davion Mitchell,9,2,7,0.222
Harrison Barnes,8,3,5,0.375
De'Aaron Fox,8,3,5,0.375
