In [1]:
# Import needed dependencies
import requests
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import re
from datetime import datetime
import datetime
from collections import Counter

In [2]:
game_logistics_df = pd.read_csv('total_game_logistics.csv')
away_stats_df = pd.read_csv('total_away_stats.csv')
home_stats_df = pd.read_csv('total_home_stats.csv')
box_score_df = pd.read_csv('total_box_score_df.csv')
hof_df = pd.read_csv('nba_hof.csv')
mvp_df = pd.read_csv('nba_mvp.csv')
all_league_df = pd.read_csv('all_league_players.csv')
allstar_df = pd.read_csv('allstars.csv')
dpoy_df = pd.read_csv('dpoy.csv')

In [3]:
all_player_stats_df = away_stats_df.append(home_stats_df)

all_player_stats_df.rename(columns = {'Unnamed: 0':'Name'}, inplace = True)

all_player_stats_df = all_player_stats_df[all_player_stats_df.Name != 'Team Totals']
all_player_stats_df = all_player_stats_df[all_player_stats_df.MP != 'Did Not Play']
all_player_stats_df = all_player_stats_df[all_player_stats_df.MP != 'Did Not Dress']

all_player_stats_df = all_player_stats_df.fillna(0)
all_player_stats_df['Games'] = 1

all_player_stats_df.sort_values(by=['PTS'], ascending=False).head(25)


Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games
0,Damian Lillard,POR,36:19,17.0,29.0,0.586,9.0,15.0,0.6,8.0,...,2.0,12.0,1.0,0.0,2.0,2.0,51.0,17.0,2/1/20,1
16,Joel Embiid,PHI,34:29,14.0,20.0,0.7,2.0,3.0,0.667,16.0,...,15.0,7.0,0.0,7.0,2.0,4.0,46.0,19.0,11/15/17,1
70,LeBron James,LAL,33:57,13.0,20.0,0.65,4.0,6.0,0.667,8.0,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,2/7/23,1
127,Trae Young,ATL,37:36,11.0,24.0,0.458,6.0,11.0,0.545,8.0,...,6.0,13.0,1.0,1.0,3.0,0.0,36.0,16.0,1/15/24,1
56,Donovan Mitchell,UTA,38:44,10.0,21.0,0.476,1.0,8.0,0.125,11.0,...,2.0,6.0,0.0,1.0,2.0,4.0,32.0,4.0,4/21/22,1
59,Jalen Brunson,DAL,35:05,12.0,22.0,0.545,0.0,4.0,0.0,7.0,...,1.0,6.0,0.0,0.0,1.0,2.0,31.0,-1.0,4/21/22,1
85,Victor Wembanyama,SAS,24:21,9.0,14.0,0.643,2.0,4.0,0.5,10.0,...,6.0,6.0,0.0,7.0,1.0,0.0,30.0,18.0,12/28/23,1
70,Shai Gilgeous-Alexander,OKC,37:41,9.0,17.0,0.529,1.0,1.0,1.0,11.0,...,2.0,8.0,2.0,0.0,4.0,5.0,30.0,-2.0,2/7/23,1
85,Jerami Grant,POR,40:31,9.0,21.0,0.429,1.0,5.0,0.2,10.0,...,10.0,5.0,0.0,0.0,4.0,4.0,29.0,-7.0,12/28/23,1
100,Keldon Johnson,SAS,32:12,9.0,18.0,0.5,3.0,8.0,0.375,8.0,...,7.0,7.0,1.0,0.0,1.0,4.0,29.0,6.0,12/29/23,1


In [4]:
# Points scored --> P
# Field goals made --> FG
# Free throws scored --> FT
# Offensive rebounds --> OR
# Defensive rebounds --> DR
# Steals --> S
# Assists --> A
# Blocked shots --> B
# Field goal attempts --> FGA
# Free throws attempts --> FTA
# Personal fouls --> PF
# Turnovers --> T
# games score --> GS

# GS=P+0.4FG−0.7FGA−0.4(FTA−FT)+0.7OR+0.3DR+S+0.7A+0.7B−0.4PF−T

# The game score metric gives a number that could be anything between 0 to 40 or even higher. If a player 
# manages 10 on the game score, they are considered average and extraordinary at 40 or higher.


PTS_list = all_player_stats_df["PTS"].values.tolist()
FG_list = all_player_stats_df["FG"].values.tolist()
FT_list = all_player_stats_df["FT"].values.tolist()
ORB_list = all_player_stats_df["ORB"].values.tolist()
DRB_list = all_player_stats_df["DRB"].values.tolist()
STL_list = all_player_stats_df["STL"].values.tolist()
AST_list = all_player_stats_df["AST"].values.tolist()
BLK_list = all_player_stats_df["BLK"].values.tolist()
FGA_list = all_player_stats_df["FGA"].values.tolist()
FTA_list = all_player_stats_df["FTA"].values.tolist()
PF_list = all_player_stats_df["PF"].values.tolist()
TOV_list = all_player_stats_df["TOV"].values.tolist()
GS_list = []

for i in range(len(PTS_list)):
    GS = ((PTS_list[i]) + (0.4*(FG_list[i])) - (0.7*(FGA_list[i])) - (0.4*((FTA_list[i]) - (FT_list[i]))) + (0.7*(ORB_list[i]))
    + (0.3*(DRB_list[i])) + (STL_list[i]) + (0.7*(AST_list[i])) + (0.7*(BLK_list[i])) - (0.4*(PF_list[i])) - (TOV_list[i]))
    
    GS_list.append(GS)
    
all_player_stats_df["GS"] = GS_list  

all_player_stats_df.to_csv('all_player_stats_df.csv')

all_player_stats_df

Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games,GS
0,Donovan Mitchell,UTA,32:24,11.0,21.0,0.524,3.0,6.0,0.500,0.0,...,4.0,1.0,0.0,2.0,2.0,25.0,-9.0,2/1/20,1,16.3
1,Rudy Gobert,UTA,32:04,1.0,4.0,0.250,0.0,0.0,0.000,4.0,...,2.0,1.0,3.0,1.0,4.0,6.0,-14.0,2/1/20,1,10.4
2,Mike Conley,UTA,30:01,9.0,20.0,0.450,4.0,6.0,0.667,0.0,...,4.0,0.0,0.0,1.0,0.0,22.0,-4.0,2/1/20,1,15.0
3,Bojan Bogdanović,UTA,28:57,6.0,11.0,0.545,5.0,9.0,0.556,5.0,...,1.0,1.0,0.0,1.0,3.0,22.0,-10.0,2/1/20,1,17.2
4,Joe Ingles,UTA,28:02,1.0,2.0,0.500,1.0,2.0,0.500,0.0,...,5.0,1.0,0.0,1.0,2.0,3.0,-6.0,2/1/20,1,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,Clint Capela,ATL,24:51,5.0,7.0,0.714,0.0,0.0,0.000,3.0,...,0.0,0.0,4.0,1.0,2.0,13.0,8.0,1/15/24,1,15.6
132,Bogdan Bogdanović,ATL,25:34,2.0,10.0,0.200,1.0,6.0,0.167,0.0,...,1.0,1.0,0.0,3.0,2.0,5.0,4.0,1/15/24,1,-2.7
133,Onyeka Okongwu,ATL,23:09,3.0,8.0,0.375,0.0,1.0,0.000,1.0,...,1.0,0.0,0.0,2.0,4.0,7.0,2.0,1/15/24,1,2.3
134,Garrison Mathews,ATL,13:12,2.0,4.0,0.500,2.0,4.0,0.500,0.0,...,0.0,0.0,0.0,0.0,0.0,6.0,17.0,1/15/24,1,4.0


In [5]:
col_list = all_player_stats_df.columns.tolist()
col_list.remove('Name')
col_list.remove('MP')
col_list.remove('FG%')
col_list.remove('3P%')
col_list.remove('FT%')
col_list.remove('Date')

total_sum_list = []

for col in col_list: 
    sum_list = []
    if col == "Team":           
        players_list = all_player_stats_df["Name"].values.tolist()
        teams_list = all_player_stats_df["Team"].values.tolist()
        new_teams_list = []
        
        for i in range(len(players_list)):
            temp_list = []
            temp_list.append(players_list[i])
            temp_list.append(teams_list[i])
            new_teams_list.append(temp_list)
        
        new_teams_df = pd.DataFrame(new_teams_list, columns = ("Name", "Team"))
        new_teams_df = new_teams_df.drop_duplicates()

        new_teams_df = new_teams_df.groupby('Name')['Team'].apply('/'.join).reset_index()
        new_teams_df = new_teams_df.set_index('Name')
    
    else:
        sum_list = all_player_stats_df.groupby('Name')[col].sum()
        total_sum_list.append(sum_list)
    
    
    
temp_player_stat_sum_df = pd.DataFrame(total_sum_list)
temp_player_stat_sum_df = temp_player_stat_sum_df.transpose()
player_stat_sum_df = pd.concat([temp_player_stat_sum_df, new_teams_df], axis=1, join="inner")

player_stat_sum_df = player_stat_sum_df.rename(columns={'3P': 'Three_P', '3PA': 'Three_PA'})

FG_list = player_stat_sum_df['FG'].tolist()
FGA_list = player_stat_sum_df['FGA'].tolist()
FGP_list = []
for i in range(len(FG_list)):
    if FGA_list[i] == 0:
        FGP_list.append(0)
    else:
        FGP = FG_list[i] / FGA_list[i]
        FGP_list.append(FGP)

player_stat_sum_df['FG%'] = FGP_list
        
Three_P_list = player_stat_sum_df['Three_P'].tolist()
Three_PA_list = player_stat_sum_df['Three_PA'].tolist()
Three_P_P_list = []
for i in range(len(Three_P_list)):
    if Three_P_list[i] == 0:
        Three_P_P_list.append(0)
    else:
        Three_P_P = Three_P_list[i] / Three_PA_list[i]
        Three_P_P_list.append(Three_P_P)

player_stat_sum_df['3P%'] = Three_P_P_list
    
FT_list = player_stat_sum_df['FT'].tolist()
FTA_list = player_stat_sum_df['FTA'].tolist()
FTP_list = []
for i in range(len(FT_list)):
    if FT_list[i] == 0:
        FTP_list.append(0)
    else:
        FTP = FT_list[i] / FTA_list[i]
        FTP_list.append(FTP)

player_stat_sum_df['FT%'] = FGP_list

player_stat_sum_df = player_stat_sum_df.rename(columns={'Three_P': '3P', 'Three_PA': '3PA'})

player_stat_sum_df = player_stat_sum_df.fillna(0)

player_stat_sum_df.loc[:, "FG%"] = player_stat_sum_df["FG%"].map('{:.3f}'.format)
player_stat_sum_df.loc[:, "3P%"] = player_stat_sum_df["3P%"].map('{:.3f}'.format)
player_stat_sum_df.loc[:, "FT%"] = player_stat_sum_df["FT%"].map('{:.3f}'.format)

FGP_Col = player_stat_sum_df.pop("FG%")
TPP_Col = player_stat_sum_df.pop("3P%")
FTP_Col = player_stat_sum_df.pop("FT%")

player_stat_sum_df.insert(6, 'FT%', FTP_Col)
player_stat_sum_df.insert(4, '3P%', TPP_Col)
player_stat_sum_df.insert(2, 'FG%', FGP_Col)

first_column = player_stat_sum_df.pop('Games')
second_column = player_stat_sum_df.pop('Team')

player_stat_sum_df.insert(0, 'Team', second_column)
player_stat_sum_df.insert(0, 'Games', first_column)

player_stat_sum_df.to_csv('player_stat_sum_df.csv')

player_stat_sum_df

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Wiggins,1.0,OKC,2.0,5.0,0.400,1.0,1.0,1.000,0.0,0.0,...,3.0,3.0,0.0,0.0,1.0,0.0,3.0,5.0,4.0,2.7
Alec Burks,1.0,UTA,3.0,8.0,0.375,1.0,3.0,0.333,3.0,4.0,...,2.0,2.0,3.0,0.0,0.0,1.0,3.0,10.0,-17.0,5.7
Alex Len,1.0,SAC,6.0,9.0,0.667,0.0,1.0,0.000,0.0,0.0,...,5.0,5.0,2.0,0.0,3.0,2.0,5.0,12.0,16.0,9.1
Amir Johnson,1.0,PHI,0.0,0.0,0.000,0.0,0.0,0.000,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,-10.0,-1.7
Andre Iguodala,1.0,GSW,1.0,7.0,0.143,0.0,2.0,0.000,1.0,2.0,...,5.0,6.0,7.0,1.0,0.0,0.0,1.0,3.0,22.0,5.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wenyen Gabriel,1.0,POR,0.0,0.0,0.000,0.0,0.0,0.000,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,-4.0,-0.4
Willy Hernangómez,1.0,NOP,2.0,2.0,1.000,0.0,0.0,0.000,4.0,4.0,...,2.0,2.0,1.0,0.0,0.0,0.0,2.0,8.0,-11.0,7.9
Yuta Watanabe,1.0,PHO,1.0,4.0,0.250,0.0,3.0,0.000,0.0,0.0,...,2.0,2.0,0.0,0.0,0.0,0.0,2.0,2.0,5.0,-0.6
Zach Collins,2.0,SAS,12.0,19.0,0.632,0.0,3.0,0.000,4.0,7.0,...,8.0,10.0,4.0,0.0,1.0,2.0,9.0,28.0,-6.0,20.0


In [6]:
player_stat_sum_df.sort_values(by=['3P%'], ascending=False).head(50)

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Wiggins,1.0,OKC,2.0,5.0,0.4,1.0,1.0,1.0,0.0,0.0,...,3.0,3.0,0.0,0.0,1.0,0.0,3.0,5.0,4.0,2.7
T.J. McConnell,1.0,PHI,3.0,5.0,0.6,1.0,1.0,1.0,0.0,0.0,...,3.0,4.0,4.0,1.0,0.0,5.0,0.0,7.0,-8.0,5.1
Jaylin Williams,1.0,OKC,4.0,6.0,0.667,2.0,2.0,1.0,4.0,4.0,...,6.0,7.0,7.0,2.0,0.0,0.0,1.0,14.0,2.0,20.4
Trent Forrest,1.0,ATL,1.0,3.0,0.333,1.0,1.0,1.0,0.0,0.0,...,2.0,3.0,0.0,0.0,0.0,0.0,2.0,3.0,-13.0,1.8
Anfernee Simons,1.0,POR,3.0,6.0,0.5,1.0,1.0,1.0,0.0,0.0,...,5.0,5.0,3.0,0.0,0.0,1.0,2.0,7.0,9.0,5.8
Rui Hachimura,1.0,LAL,5.0,7.0,0.714,1.0,1.0,1.0,3.0,4.0,...,5.0,6.0,0.0,0.0,0.0,1.0,0.0,14.0,-11.0,11.9
Sasha Vezenkov,1.0,SAC,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,3.0,-4.0,2.0
Shai Gilgeous-Alexander,1.0,OKC,9.0,17.0,0.529,1.0,1.0,1.0,11.0,12.0,...,2.0,2.0,8.0,2.0,0.0,4.0,5.0,30.0,-2.0,23.5
Maxi Kleber,1.0,DAL,6.0,7.0,0.857,4.0,5.0,0.8,1.0,2.0,...,2.0,3.0,1.0,0.0,0.0,1.0,5.0,17.0,10.0,13.1
Nick Young,1.0,GSW,6.0,9.0,0.667,3.0,4.0,0.75,0.0,0.0,...,1.0,1.0,1.0,0.0,0.0,0.0,1.0,15.0,10.0,11.7


In [7]:
col_list = all_player_stats_df.columns.tolist()
col_list.remove('Name')
col_list.remove('MP')
col_list.remove('FG%')
col_list.remove('3P%')
col_list.remove('FT%')
col_list.remove('Date')

total_average_list = []

for col in col_list: 
    average_list = []
    teams_list = []
    
    if col == "Games":
        average_list = all_player_stats_df.groupby('Name')[col].sum()
        total_average_list.append(average_list)
        
    elif col == "Team":           
        players_list = all_player_stats_df["Name"].values.tolist()
        teams_list = all_player_stats_df["Team"].values.tolist()
        new_teams_list = []
        
        for i in range(len(players_list)):
            temp_list = []
            temp_list.append(players_list[i])
            temp_list.append(teams_list[i])
            new_teams_list.append(temp_list)
        
        new_teams_df = pd.DataFrame(new_teams_list, columns = ("Name", "Team"))
        new_teams_df = new_teams_df.drop_duplicates()

        new_teams_df = new_teams_df.groupby('Name')['Team'].apply('/'.join).reset_index()
        new_teams_df = new_teams_df.set_index('Name')

    else:
        average_list = all_player_stats_df.groupby('Name')[col].mean()
        total_average_list.append(average_list)
    
temp_player_stat_average_df = pd.DataFrame(total_average_list)
temp_player_stat_average_df = temp_player_stat_average_df.transpose()
player_stat_average_df = pd.concat([temp_player_stat_average_df, new_teams_df], axis=1, join="inner")

player_stat_average_df = player_stat_average_df.rename(columns={'3P': 'Three_P', '3PA': 'Three_PA'})

FG_list2 = player_stat_average_df['FG'].tolist()
FGA_list2 = player_stat_average_df['FGA'].tolist()
FGP_list2 = []
for i in range(len(FG_list2)):
    if FGA_list2[i] == 0:
        FGP_list2.append(0)
    else:
        FGP2 = FG_list2[i] / FGA_list2[i]
        FGP_list2.append(FGP2)

player_stat_average_df['FG%'] = FGP_list2
        
Three_P_list2 = player_stat_average_df['Three_P'].tolist()
Three_PA_list2 = player_stat_average_df['Three_PA'].tolist()
Three_P_P_list2 = []
for i in range(len(Three_P_list2)):
    if Three_P_list2[i] == 0:
        Three_P_P_list2.append(0)
    else:
        Three_P_P2 = Three_P_list2[i] / Three_PA_list2[i]
        Three_P_P_list2.append(Three_P_P2)

player_stat_average_df['3P%'] = Three_P_P_list2
    
FT_list2 = player_stat_average_df['FT'].tolist()
FTA_list2 = player_stat_average_df['FTA'].tolist()
FTP_list2 = []
for i in range(len(FT_list2)):
    if FT_list2[i] == 0:
        FTP_list2.append(0)
    else:
        FTP2 = FT_list2[i] / FTA_list2[i]
        FTP_list2.append(FTP2)

player_stat_average_df['FT%'] = FGP_list2
      
player_stat_average_df = player_stat_average_df.rename(columns={'Three_P': '3P', 'Three_PA': '3PA'})

player_stat_average_df = player_stat_average_df.fillna(0)

player_stat_average_df.loc[:, "FG%"] = player_stat_average_df["FG%"].map('{:.3f}'.format)
player_stat_average_df.loc[:, "3P%"] = player_stat_average_df["3P%"].map('{:.3f}'.format)
player_stat_average_df.loc[:, "FT%"] = player_stat_average_df["FT%"].map('{:.3f}'.format)

FGP_Col = player_stat_average_df.pop("FG%")
TPP_Col = player_stat_average_df.pop("3P%")
FTP_Col = player_stat_average_df.pop("FT%")

player_stat_average_df.insert(6, 'FT%', FTP_Col)
player_stat_average_df.insert(4, '3P%', TPP_Col)
player_stat_average_df.insert(2, 'FG%', FGP_Col)

player_stat_average_df = player_stat_average_df.round(2)

first_column2 = player_stat_average_df.pop('Games')
second_column2 = player_stat_average_df.pop('Team')

player_stat_average_df.insert(0, 'Team', second_column2)
player_stat_average_df.insert(0, 'Games', first_column2)

player_stat_average_df.to_csv('player_stat_average_df.csv')

player_stat_average_df

Unnamed: 0_level_0,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,FTA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Wiggins,1.0,OKC,2.0,5.0,0.400,1.0,1.0,1.000,0.0,0.0,...,3.0,3.0,0.0,0.0,1.0,0.0,3.0,5.0,4.0,2.7
Alec Burks,1.0,UTA,3.0,8.0,0.375,1.0,3.0,0.333,3.0,4.0,...,2.0,2.0,3.0,0.0,0.0,1.0,3.0,10.0,-17.0,5.7
Alex Len,1.0,SAC,6.0,9.0,0.667,0.0,1.0,0.000,0.0,0.0,...,5.0,5.0,2.0,0.0,3.0,2.0,5.0,12.0,16.0,9.1
Amir Johnson,1.0,PHI,0.0,0.0,0.000,0.0,0.0,0.000,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,2.0,1.0,0.0,-10.0,-1.7
Andre Iguodala,1.0,GSW,1.0,7.0,0.143,0.0,2.0,0.000,1.0,2.0,...,5.0,6.0,7.0,1.0,0.0,0.0,1.0,3.0,22.0,5.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wenyen Gabriel,1.0,POR,0.0,0.0,0.000,0.0,0.0,0.000,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,-4.0,-0.4
Willy Hernangómez,1.0,NOP,2.0,2.0,1.000,0.0,0.0,0.000,4.0,4.0,...,2.0,2.0,1.0,0.0,0.0,0.0,2.0,8.0,-11.0,7.9
Yuta Watanabe,1.0,PHO,1.0,4.0,0.250,0.0,3.0,0.000,0.0,0.0,...,2.0,2.0,0.0,0.0,0.0,0.0,2.0,2.0,5.0,-0.6
Zach Collins,2.0,SAS,6.0,9.5,0.632,0.0,1.5,0.000,2.0,3.5,...,4.0,5.0,2.0,0.0,0.5,1.0,4.5,14.0,-3.0,10.0


In [8]:
all_player_stats_df.sort_values(by=['GS'], ascending=False).head(20)

Unnamed: 0,Name,Team,MP,FG,FGA,FG%,3P,3PA,3P%,FT,...,AST,STL,BLK,TOV,PF,PTS,+/-,Date,Games,GS
16,Joel Embiid,PHI,34:29,14.0,20.0,0.7,2.0,3.0,0.667,16.0,...,7.0,0.0,7.0,2.0,4.0,46.0,19.0,11/15/17,1,47.9
0,Damian Lillard,POR,36:19,17.0,29.0,0.586,9.0,15.0,0.6,8.0,...,12.0,1.0,0.0,2.0,2.0,51.0,17.0,2/1/20,1,44.7
85,Victor Wembanyama,SAS,24:21,9.0,14.0,0.643,2.0,4.0,0.5,10.0,...,6.0,0.0,7.0,1.0,0.0,30.0,18.0,12/28/23,1,33.7
127,Trae Young,ATL,37:36,11.0,24.0,0.458,6.0,11.0,0.545,8.0,...,13.0,1.0,1.0,3.0,0.0,36.0,16.0,1/15/24,1,32.8
70,LeBron James,LAL,33:57,13.0,20.0,0.65,4.0,6.0,0.667,8.0,...,3.0,3.0,0.0,4.0,3.0,38.0,8.0,2/7/23,1,30.4
113,Domantas Sabonis,SAC,34:37,12.0,15.0,0.8,0.0,2.0,0.0,4.0,...,12.0,0.0,0.0,3.0,3.0,28.0,27.0,12/22/23,1,29.8
14,Ben Simmons,PHI,39:06,8.0,13.0,0.615,0.0,0.0,0.0,2.0,...,10.0,5.0,1.0,1.0,1.0,18.0,19.0,11/15/17,1,26.5
125,Victor Wembanyama,SAS,27:05,12.0,18.0,0.667,1.0,5.0,0.2,1.0,...,2.0,2.0,5.0,3.0,2.0,26.0,-2.0,1/15/24,1,26.0
43,Tyrese Haliburton,SAC,36:36,6.0,11.0,0.545,1.0,3.0,0.333,7.0,...,7.0,4.0,2.0,0.0,1.0,20.0,7.0,11/3/21,1,25.4
100,Keldon Johnson,SAS,32:12,9.0,18.0,0.5,3.0,8.0,0.375,8.0,...,7.0,1.0,0.0,1.0,4.0,29.0,6.0,12/29/23,1,25.0


In [9]:
# val = all_player_stats_df['MP'].values[0]
# val2 = all_player_stats_df['MP'].values[1]

# datetime_object = datetime.datetime.strptime(val, '%M:%S').time()
# datetime_object2 = datetime.datetime.strptime(val2, '%M:%S').time()

# timeList = [val, val2]

# mysum = datetime.timedelta()
# for i in timeList:
#     (m, s) = i.split(':')
#     d = datetime.timedelta(minutes=int(m), seconds=int(s))
#     mysum += d
# print(str(mysum))

# time_list = all_player_stats_df['MP']
# time_sum_list = []
# for i in range(len(time_list)):
#     val = all_player_stats_df['MP'].values[i]
#     datetime_object = datetime.datetime.strptime(val, '%M:%S').time()


In [10]:
box_score_df

Unnamed: 0,Date,Home_Away,Team,1Q,2Q,3Q,4Q,T
0,2/1/20,Away,UTA,34,19,30,24,107
1,2/1/20,Home,POR,33,30,31,30,124
2,11/15/17,Away,PHI,29,29,25,32,115
3,11/15/17,Home,LAL,26,28,30,25,109
4,12/27/17,Away,UTA,24,23,22,32,101
5,12/27/17,Home,GSW,23,25,42,36,126
6,11/3/21,Away,NOP,26,26,35,12,99
7,11/3/21,Home,SAC,27,29,36,20,112
8,4/21/22,Away,DAL,27,41,29,29,126
9,4/21/22,Home,UTA,20,31,40,27,118


In [11]:
box_score_df

date_list = box_score_df['Date'].tolist()
final_score_list = box_score_df['T'].tolist()
win_loss_list = []
home_team_wins = 0
away_team_wins = 0
home_team_losses = 0
away_team_losses = 0

try:
    for i in range(len(final_score_list)):
        if date_list[i] == date_list[i+1]:
            if final_score_list[i] > final_score_list[i + 1]:
                win_loss_list.append('W')
                win_loss_list.append('L')
                away_team_wins = away_team_wins + 1
                home_team_losses = home_team_losses + 1
            else:
                win_loss_list.append('L')
                win_loss_list.append('W')
                home_team_wins = home_team_wins + 1
                away_team_losses = away_team_losses + 1
        else:
            continue
except IndexError:
    print("An exception was made")

home_win_loss_percentage = (home_team_wins/(home_team_wins + home_team_losses))
away_win_loss_percentage = (away_team_wins/(away_team_wins + away_team_losses))

An exception was made


In [12]:
home_team_record = ['Home',home_team_wins,home_team_losses,home_win_loss_percentage]
away_team_record = ['Away',away_team_wins,away_team_losses,away_win_loss_percentage]

home_away_record_df = pd.DataFrame(columns = ['', 'Wins', 'Losses','Win%'])
  
home_away_record_df.loc[len(home_away_record_df)] = home_team_record
home_away_record_df.loc[len(home_away_record_df)] = away_team_record
home_away_record_df = home_away_record_df.set_index('')
home_away_record_df.loc[:, "Win%"] = home_away_record_df["Win%"].map('{:.3f}'.format)

home_away_record_df.to_csv('home_away_record_df.csv')

home_away_record_df

Unnamed: 0,Wins,Losses,Win%
,,,
Home,6.0,4.0,0.6
Away,4.0,6.0,0.4


In [13]:
box_score_df['Decision'] = win_loss_list
box_score_df

Unnamed: 0,Date,Home_Away,Team,1Q,2Q,3Q,4Q,T,Decision
0,2/1/20,Away,UTA,34,19,30,24,107,L
1,2/1/20,Home,POR,33,30,31,30,124,W
2,11/15/17,Away,PHI,29,29,25,32,115,W
3,11/15/17,Home,LAL,26,28,30,25,109,L
4,12/27/17,Away,UTA,24,23,22,32,101,L
5,12/27/17,Home,GSW,23,25,42,36,126,W
6,11/3/21,Away,NOP,26,26,35,12,99,L
7,11/3/21,Home,SAC,27,29,36,20,112,W
8,4/21/22,Away,DAL,27,41,29,29,126,W
9,4/21/22,Home,UTA,20,31,40,27,118,L


In [14]:
team_records_df = pd.DataFrame(columns = ['Team', 'Games', 'Wins', 'Losses','Win%'])

for item in box_score_df.Team.unique():
    team_wins = len(box_score_df.loc[(box_score_df['Team'] == item) & (box_score_df['Decision'] == 'W')].index)
    team_losses = len(box_score_df.loc[(box_score_df['Team'] == item) & (box_score_df['Decision'] == 'L')].index)
    
    team_win_percentage = (team_wins/(team_wins + team_losses))
    team_total_games = (team_wins + team_losses)
    
    team_record_list = [item,team_total_games,team_wins,team_losses,team_win_percentage]
    team_records_df.loc[len(team_records_df)] = team_record_list

team_records_df = team_records_df.set_index('Team')
team_records_df.loc[:, "Win%"] = team_records_df["Win%"].map('{:.3f}'.format)

team_records_df.to_csv('team_records_df.csv')

team_records_df.sort_values(by=['Games','Win%'], ascending=False)

Unnamed: 0_level_0,Games,Wins,Losses,Win%
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
POR,3,2,1,0.667
SAS,3,1,2,0.333
UTA,3,0,3,0.0
SAC,2,2,0,1.0
LAL,2,0,2,0.0
PHI,1,1,0,1.0
GSW,1,1,0,1.0
DAL,1,1,0,1.0
OKC,1,1,0,1.0
ATL,1,1,0,1.0


In [15]:
player_records_df = pd.DataFrame(columns = ['Name', 'Games', 'Wins', 'Losses','Win%'])

for player in all_player_stats_df.Name.unique():
    
    temp_df = all_player_stats_df.loc[(all_player_stats_df['Name'] == player)]
    temp_merge_df = pd.merge(temp_df, box_score_df, on=['Team','Date'])
         
    player_wins = 0
    player_losses = 0
        
    for item in temp_merge_df['Decision']:
        if item == 'L':
            player_losses = player_losses + 1
        else:
            player_wins = player_wins + 1

        player_win_percentage = (player_wins/(player_wins + player_losses))
        player_total_games = (player_wins + player_losses)

        player_record_list = [player,player_total_games,player_wins,player_losses,player_win_percentage]
        player_records_df.loc[len(player_records_df)] = player_record_list

player_records_df.loc[:, "Win%"] = player_records_df["Win%"].map('{:.3f}'.format)
player_records_df = player_records_df.sort_values(by=['Games'], ascending=False)                                      

player_records_df = player_records_df.drop_duplicates(subset=['Name'])
player_records_df = player_records_df.set_index('Name')

player_records_df = player_records_df.sort_values(by=['Games','Win%'], ascending=False)                                      

player_records_df.to_csv('player_records_df.csv')

player_records_df.head(50)


Unnamed: 0_level_0,Games,Wins,Losses,Win%
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Keldon Johnson,3,1,2,0.333
Cedi Osman,3,1,2,0.333
Julian Champagnie,3,1,2,0.333
Jeremy Sochan,3,1,2,0.333
Devin Vassell,3,1,2,0.333
Tre Jones,3,1,2,0.333
Malaki Branham,3,1,2,0.333
Doug McDermott,3,1,2,0.333
Sandro Mamukelashvili,3,1,2,0.333
Jordan Clarkson,3,0,3,0.0


In [16]:
player_stat_average_df.reset_index(inplace=True)
player_stat_average_df.rename(columns={'Name': 'Player'}, inplace=True)
hof_seen_df = pd.merge(player_stat_average_df, hof_df, on="Player")
hof_seen_df.drop('Unnamed: 0', axis = 1, inplace = True)

hof_seen_df.to_csv('hof_seen_df.csv')

hof_seen_df

Unnamed: 0,Player,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS


In [17]:
player_stat_average_df.reset_index(inplace=True)
player_stat_average_df.rename(columns={'Name': 'Player'}, inplace=True)
mvp_seen_df = pd.merge(player_stat_average_df, mvp_df, on="Player")
mvp_seen_df.drop('Unnamed: 0', axis = 1, inplace = True)

mvp_seen_df.to_csv('mvp_seen_df.csv')

mvp_seen_df

Unnamed: 0,index,Player,Games,Team,FG,FGA,FG%,3P,3PA,3P%,...,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS,Year
0,75,Joel Embiid,1.0,PHI,14.0,20.0,0.7,2.0,3.0,0.667,...,15.0,7.0,0.0,7.0,2.0,4.0,46.0,19.0,47.9,2023
1,96,Kevin Durant,2.0,PHO/GSW,8.0,12.0,0.667,2.0,3.0,0.667,...,6.5,3.0,0.5,1.5,4.5,1.5,24.5,4.0,19.6,2014
2,102,LeBron James,1.0,LAL,13.0,20.0,0.65,4.0,6.0,0.667,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,30.4,2013
3,102,LeBron James,1.0,LAL,13.0,20.0,0.65,4.0,6.0,0.667,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,30.4,2012
4,102,LeBron James,1.0,LAL,13.0,20.0,0.65,4.0,6.0,0.667,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,30.4,2010
5,102,LeBron James,1.0,LAL,13.0,20.0,0.65,4.0,6.0,0.667,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,30.4,2009
6,131,Russell Westbrook,1.0,LAL,10.0,19.0,0.526,4.0,7.0,0.571,...,4.0,8.0,2.0,1.0,6.0,1.0,27.0,4.0,19.6,2017


In [18]:
all_league_team_list = all_league_df['Team'].to_list()
all_league_team_list = all_league_df['Team'].to_list()
all_league_df.drop('Team', axis = 1, inplace = True)
all_league_df['All_NBA_Team'] = all_league_team_list

all_league_player_list = all_league_df['Player'].to_list()
cleaned_all_league_player_list = []

for player in all_league_player_list:
    cap_letter_check = player[-1]
    if cap_letter_check.isupper() == True:
        cleaned_player = player[:-1]
        cleaned_all_league_player_list.append(cleaned_player)
    else:
        cleaned_all_league_player_list.append(player)

consolidated_all_league_dict = dict(Counter(cleaned_all_league_player_list))
aggregate_all_league_df = pd.DataFrame(list(consolidated_all_league_dict.items()), columns = ['Player', 'X_All_NBA'])

    
# # Drop that column
# all_league_df.drop('Player', axis = 1, inplace = True)

# # Put whatever series you want in its place
# all_league_df['Player'] = cleaned_all_league_player_list
# all_league_df.drop('Unnamed: 0', axis = 1, inplace = True)

player_stat_average_df.reset_index(inplace=True)
player_stat_average_df.rename(columns={'Name': 'Player'}, inplace=True)
aggregate_all_league_df = pd.merge(player_stat_average_df, aggregate_all_league_df, on="Player")

aggregate_all_league_df.drop('index', axis = 1, inplace = True)
aggregate_all_league_df.drop('level_0', axis = 1, inplace = True)

aggregate_all_league_df.to_csv('aggregate_all_league_df.csv')

aggregate_all_league_df.sort_values(by=['X_All_NBA'], ascending=False)

Unnamed: 0,Player,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,...,TRB,AST,STL,BLK,TOV,PF,PTS,+/-,GS,X_All_NBA
15,LeBron James,1.0,LAL,13.0,20.0,0.65,4.0,6.0,0.667,8.0,...,7.0,3.0,3.0,0.0,4.0,3.0,38.0,8.0,30.4,19
13,Kevin Durant,2.0,PHO/GSW,8.0,12.0,0.667,2.0,3.0,0.667,6.5,...,6.5,3.0,0.5,1.5,4.5,1.5,24.5,4.0,19.6,10
17,Russell Westbrook,1.0,LAL,10.0,19.0,0.526,4.0,7.0,0.571,3.0,...,4.0,8.0,2.0,1.0,6.0,1.0,27.0,4.0,19.6,9
4,Damian Lillard,1.0,POR,17.0,29.0,0.586,9.0,15.0,0.6,8.0,...,2.0,12.0,1.0,0.0,2.0,2.0,51.0,17.0,44.7,7
3,Carmelo Anthony,1.0,POR,5.0,15.0,0.333,1.0,3.0,0.333,4.0,...,5.0,1.0,0.0,0.0,0.0,1.0,15.0,9.0,9.5,6
11,Joel Embiid,1.0,PHI,14.0,20.0,0.7,2.0,3.0,0.667,16.0,...,15.0,7.0,0.0,7.0,2.0,4.0,46.0,19.0,47.9,5
16,Rudy Gobert,2.0,UTA,3.5,5.0,0.7,0.0,0.0,0.0,3.5,...,9.0,1.0,0.5,2.0,0.5,4.0,10.5,-15.0,12.4,4
1,Anthony Davis,1.0,LAL,6.0,9.0,0.667,0.0,0.0,0.0,1.0,...,8.0,2.0,2.0,1.0,2.0,4.0,13.0,-4.0,12.8,4
14,Klay Thompson,1.0,GSW,5.0,12.0,0.417,3.0,5.0,0.6,2.0,...,0.0,1.0,0.0,1.0,2.0,1.0,15.0,14.0,7.6,2
9,Draymond Green,1.0,GSW,6.0,10.0,0.6,2.0,5.0,0.4,0.0,...,8.0,8.0,0.0,0.0,3.0,1.0,14.0,6.0,14.0,2


In [19]:
## End goal is to have a list that reads [1st: X, 2nd: Y, 3rd: Z] to show for each player

# all_league_player_list
# all_league_team_list

all_league_df['Player'] = cleaned_all_league_player_list

summary_list = []

for player in cleaned_all_league_player_list:
    player_all_nba_team_list = all_league_df.loc[all_league_df['Player'] == player]['All_NBA_Team'].to_list()
    
    for i in range(len(player_all_nba_team_list)):
        if i == 0:
            first_team_counter = 0
            second_team_counter = 0
            three_team_counter = 0

        if player_all_nba_team_list[i] == '1st':
            first_team_counter = first_team_counter + 1
        elif player_all_nba_team_list[i] == '2nd':
            second_team_counter = second_team_counter + 1
        else:
            three_team_counter = three_team_counter + 1

    summary = [f'1st Team: {first_team_counter}',f'2nd Team: {second_team_counter}',f'3rd Team: {three_team_counter}']
    summary_list.append(summary)  
        
all_league_df['All_League_List'] = summary_list
all_league_list_df = all_league_df.drop(['Unnamed: 0','Year','All_NBA_Team'], axis=1)

# player_stat_average_df.reset_index(inplace=True)
# player_stat_average_df.rename(columns={'Name': 'Player'}, inplace=True)
all_league_list_df = pd.merge(aggregate_all_league_df, all_league_list_df, on="Player")

all_league_list_df.drop_duplicates(subset = 'Player', inplace=True)
# all_league_list_df.sort_values(by=['X_All_NBA'], ascending=False)

all_league_simple_df = all_league_list_df[['Player', 'Games','X_All_NBA','All_League_List']].copy()

all_league_simple_df.to_csv('all_league_simple_df.csv')

all_league_simple_df.sort_values(by=['X_All_NBA'], ascending=False)

Unnamed: 0,Player,Games,X_All_NBA,All_League_List
45,LeBron James,1.0,19,"[1st Team: 13, 2nd Team: 3, 3rd Team: 3]"
33,Kevin Durant,2.0,10,"[1st Team: 6, 2nd Team: 4, 3rd Team: 0]"
68,Russell Westbrook,1.0,9,"[1st Team: 2, 2nd Team: 5, 3rd Team: 2]"
12,Damian Lillard,1.0,7,"[1st Team: 1, 2nd Team: 4, 3rd Team: 2]"
6,Carmelo Anthony,1.0,6,"[1st Team: 0, 2nd Team: 2, 3rd Team: 4]"
26,Joel Embiid,1.0,5,"[1st Team: 1, 2nd Team: 4, 3rd Team: 0]"
64,Rudy Gobert,2.0,4,"[1st Team: 0, 2nd Team: 1, 3rd Team: 3]"
1,Anthony Davis,1.0,4,"[1st Team: 4, 2nd Team: 0, 3rd Team: 0]"
43,Klay Thompson,1.0,2,"[1st Team: 0, 2nd Team: 0, 3rd Team: 2]"
23,Draymond Green,1.0,2,"[1st Team: 0, 2nd Team: 1, 3rd Team: 1]"


In [20]:
all_star_list = allstar_df['All_Stars'].to_list()

i = 0
allstars_seen_list = []
allstar_appearance_counter_list = []

for player in all_player_stats_df.Name.unique():
    allstar_appearance_counter = 0
    
    for i in range(len(all_star_list)):
        if (player in all_star_list[i]) == True:
            if player not in allstars_seen_list:
                allstars_seen_list.append(player)
                allstar_appearance_counter = allstar_appearance_counter + 1
            else:
                allstar_appearance_counter = allstar_appearance_counter + 1
    
    allstar_appearance_counter_list.append(allstar_appearance_counter)
    i = i + 1            
    
allstars_seen_counter_list = [i for i in allstar_appearance_counter_list if i != 0]
    
allstars_seen_df = pd.DataFrame(columns = ['Player', 'Appearances'])
allstars_seen_df['Player'] = allstars_seen_list
allstars_seen_df['Appearances'] = allstars_seen_counter_list

allstars_seen_df.to_csv('allstars_seen_df.csv')

allstars_seen_df.sort_values(by=['Appearances'], ascending=False)

Unnamed: 0,Player,Appearances
20,LeBron James,19
8,Kevin Durant,13
10,Carmelo Anthony,10
22,Russell Westbrook,9
21,Anthony Davis,8
5,Joe Johnson,7
9,Damian Lillard,7
4,Joel Embiid,6
14,Klay Thompson,5
15,Draymond Green,4


In [21]:
dpoy_seen_df = pd.merge(player_stat_average_df, dpoy_df, on="Player")

dpoy_seen_df.drop('level_0', axis = 1, inplace = True)
dpoy_seen_df.drop('index', axis = 1, inplace = True)
dpoy_seen_df.drop('Unnamed: 0', axis = 1, inplace = True)

dpoy_seen_df.to_csv('dpoy_seen_df.csv')

dpoy_seen_df.sort_values(by=['X_DPOY'], ascending=False)

Unnamed: 0,Player,Games,Team,FG,FGA,FG%,3P,3PA,3P%,FT,...,AST,STL,BLK,TOV,PF,PTS,+/-,GS,X_DPOY,Years
1,Rudy Gobert,2.0,UTA,3.5,5.0,0.7,0.0,0.0,0.0,3.5,...,1.0,0.5,2.0,0.5,4.0,10.5,-15.0,12.4,3,"[2021, 2019, 2018]"
0,Draymond Green,1.0,GSW,6.0,10.0,0.6,2.0,5.0,0.4,0.0,...,8.0,0.0,0.0,3.0,1.0,14.0,6.0,14.0,1,[2017]


## 

In [22]:
team_codes_df = pd.read_csv('team_codes.csv')
games_attended_df = pd.read_csv('badal_sports_attendance.csv')

home_teams_seen_list = games_attended_df['Home'].unique()
away_teams_seen_list = games_attended_df['Visitor'].unique()
all_teams_seen_list = list(home_teams_seen_list)

# for away_team in away_teams_seen_list:
#     all_teams_seen_list.append(away_team)

[all_teams_seen_list.append(x) for x in away_teams_seen_list if x not in all_teams_seen_list]

    
mlb_teams_list = [x for x in team_codes_df['MLB_Teams'].to_list() if str(x) != 'nan']
nba_teams_list = [x for x in team_codes_df['NBA_Teams'].to_list() if str(x) != 'nan']
nfl_teams_list = [x for x in team_codes_df['NFL_Teams'].to_list() if str(x) != 'nan']
nhl_teams_list = [x for x in team_codes_df['NHL_Teams'].to_list() if str(x) != 'nan']
mls_teams_list = [x for x in team_codes_df['MLS_Codes'].to_list() if str(x) != 'nan']

home_mlb_teams_seen_list = []
home_nba_teams_seen_list = []
home_nfl_teams_seen_list = []
home_nhl_teams_seen_list = []
home_mls_teams_seen_list = []

for team in home_teams_seen_list:
    if team in mlb_teams_list:
        home_mlb_teams_seen_list.append(team)
    elif team in nba_teams_list:
        home_nba_teams_seen_list.append(team)
    elif team in nfl_teams_list:
        home_nfl_teams_seen_list.append(team)
    elif team in nhl_teams_list:
        home_nhl_teams_seen_list.append(team)
    else: 
        team = team.replace(' ', '-')
        for mls_team in mls_teams_list:
            if team in mls_team:
                home_mls_teams_seen_list.append(team)
            else:
                continue

                
all_mlb_teams_seen_list = []
all_nba_teams_seen_list = []
all_nfl_teams_seen_list = []
all_nhl_teams_seen_list = []
all_mls_teams_seen_list = []

for team in all_teams_seen_list:
    if team in mlb_teams_list:
        all_mlb_teams_seen_list.append(team)
    elif team in nba_teams_list:
        all_nba_teams_seen_list.append(team)
    elif team in nfl_teams_list:
        all_nfl_teams_seen_list.append(team)
    elif team in nhl_teams_list:
        all_nhl_teams_seen_list.append(team)
    else: 
        team = team.replace(' ', '-')
        for mls_team in mls_teams_list:
            if team in mls_team:
                all_mls_teams_seen_list.append(team)
            else:
                continue
                
mlb_stadiums_visited = (len(home_mlb_teams_seen_list)/((len(mlb_teams_list))-1))
nba_stadiums_visited = (len(home_nba_teams_seen_list)/len(nba_teams_list))
nfl_stadiums_visited = (len(home_nfl_teams_seen_list)/len(nfl_teams_list))
nhl_stadiums_visited = (len(home_nhl_teams_seen_list)/len(nhl_teams_list))
mls_stadiums_visited = (len(home_mls_teams_seen_list)/len(mls_teams_list))

mlb_teams_seen = (len(all_mlb_teams_seen_list)/len(mlb_teams_list))
nba_teams_seen = (len(all_nba_teams_seen_list)/len(nba_teams_list))
nfl_teams_seen = (len(all_nfl_teams_seen_list)/len(nfl_teams_list))
nhl_teams_seen = (len(all_nhl_teams_seen_list)/len(nhl_teams_list))
mls_teams_seen = (len(all_mls_teams_seen_list)/len(mls_teams_list))

games_dict = {}

games_dict['League'] = ['MLB','NBA','NFL','NHL','MLS']
games_dict['League_Teams_Total'] = [((len(mlb_teams_list))-1),len(nba_teams_list),len(nfl_teams_list),len(nhl_teams_list),len(mls_teams_list)]
games_dict['Stadiums_Visited'] = [len(home_mlb_teams_seen_list),len(home_nba_teams_seen_list),len(home_nfl_teams_seen_list),len(home_nhl_teams_seen_list),len(home_mls_teams_seen_list)]
games_dict['%_Visited'] = [mlb_stadiums_visited, nba_stadiums_visited, nfl_stadiums_visited, nhl_stadiums_visited, mls_stadiums_visited]
games_dict['Teams_Seen'] = [len(all_mlb_teams_seen_list),len(all_nba_teams_seen_list),len(all_nfl_teams_seen_list),len(all_nhl_teams_seen_list),len(all_mls_teams_seen_list)]
games_dict['%_Teams_Seen'] = [mlb_teams_seen,nba_teams_seen,nfl_teams_seen,nhl_teams_seen,mls_teams_seen]


games_breakdown_df = pd.DataFrame(games_dict)
games_breakdown_df.loc[:, "%_Visited"] = games_breakdown_df["%_Visited"].map('{:.1%}'.format)
games_breakdown_df.loc[:, "%_Teams_Seen"] = games_breakdown_df["%_Teams_Seen"].map('{:.1%}'.format)

games_breakdown_df.to_csv('games_breakdown_df.csv')

games_breakdown_df.set_index('League')


Unnamed: 0_level_0,League_Teams_Total,Stadiums_Visited,%_Visited,Teams_Seen,%_Teams_Seen
League,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MLB,32,0,0.0%,0,0.0%
NBA,30,6,20.0%,12,40.0%
NFL,32,0,0.0%,0,0.0%
NHL,32,0,0.0%,0,0.0%
MLS,28,0,0.0%,0,0.0%
