In [202]:
# import python libraries and nba library
import numpy as np
import nba_api as nba
from matplotlib import pyplot as plt
import pandas as pd
import socket
import logging
import os
import glob

In [203]:
# Get all files in working extraction data directory
path = os.getcwd()
path = path.replace("notebooks","data")+'\\'+'extraction-files'
csv_files = glob.glob(os.path.join(path,"*.csv"))

In [204]:
# bringing in all new files
extract_df = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file)
    frames = [extract_df, df]
    extract_df = pd.concat(frames)

In [205]:
# Cosmetic changes and dropping duplicaes that snuck in on import
extract_df = extract_df.rename(columns={"Unnamed: 0": "Season #"})
extract_df = extract_df.drop(columns=["LEAGUE_ID", "PLAYER_AGE", "GS"])
extract_df = extract_df.drop_duplicates()

In [206]:
# Get all files in working evergreen data directory
path = os.getcwd()
path = path.replace("notebooks","data")+'\\'+'evergreen-files'
csv_files = glob.glob(os.path.join(path,"*.csv"))

In [217]:
# bring in evergreen file
evergreen_df = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file)
    frames = [evergreen_df, df]
    evergreen_df = pd.concat(frames)

In [218]:
evergreen_df = evergreen_df.rename(columns={"Unnamed: 0": "Rank"})
evergreen_df = evergreen_df.drop_duplicates()
evergreen_df = evergreen_df.drop(columns=["Rank", "RANK",'EFF','AST_TOV','STL_TOV'])
evergreen_df.insert(0, "Season #", "Now")
evergreen_df.insert(3, "SEASON_ID", "2022-23")

In [219]:
extract_df = extract_df[(extract_df["SEASON_ID"] != "2022-23")]

In [221]:
# # MWAR Formula Coefficients:
# coFGM = 2
# coFGA = -1.2
# coFG3M = 1
# coFG3A = -0.4
# coFTM = 1
# coFTA = -1
# coDREB = 1.5
# coOREB = 2
# coAST = 2.5
# coTOV = -3
# coSTL = 2
# coBLK = 2
# coPF = -0.5
# coPTS = 1 

# totFGM = coFGM * full_df["FGM"]
# totFGA = coFGA * full_df["FGA"]
# totFG3M = coFG3M * full_df["FG3M"]
# totFG3A = coFG3A * full_df["FG3A"]
# totFTM = coFTM * full_df["FTM"]
# totFTA = coFTA * full_df["FTA"]
# totDREB = coDREB * full_df["DREB"]
# totOREB = coOREB * full_df["OREB"]
# totAST = coAST * full_df["AST"]
# totTOV = coTOV * full_df["TOV"]
# totSTL = coSTL * full_df["STL"]
# totBLK = coBLK * full_df["BLK"]
# totPF = coPF * full_df["PF"]
# totPTS = coPTS * full_df["PTS"]
# totMIN = full_df["MIN"] - 18

# full_df["MWAR"] = totFGM + totFGA + totFG3A + totFG3M + totFTM + totFTA + totDREB + totOREB + totAST + totTOV + totSTL + totBLK + totPF + totPTS + totMIN

In [222]:
full_df = pd.DataFrame(np.vstack([extract_df, evergreen_df]), columns=evergreen_df.columns)

In [223]:
FGMA = (full_df["FGA"] - full_df["FGM"])*1.25
FG3MA = (full_df["FG3A"] - full_df["FG3M"])*1.25
FTMA = (full_df["FTA"]-full_df["FTM"])*0.9
REB = 3*full_df["OREB"] + 2*full_df["DREB"]
ATR = 3*(full_df["AST"]-full_df["TOV"])
STB = 2*(full_df["BLK"]+full_df["STL"])
PF = full_df["PF"]*-0.5
PTS = full_df["PTS"]*1
MIN = full_df["MIN"] - 18*full_df["GP"]

full_df["MOWAR"] = FGMA + FG3MA + FTMA + PTS
full_df["MDWAR"] = STB + PF
full_df["MPWAR"] = ATR
full_df["MIWAR"] = REB + MIN
full_df["MWAR"] = FGMA + FG3MA + FTMA + REB + ATR + STB + PF + PTS + MIN
full_df["MWARPM"] = full_df["MWAR"]/full_df["MIN"]
full_df["MWARPG"] = full_df["MWAR"]/full_df["GP"]

In [224]:
# full_2021_test = full_df.iloc[:,[2,3,8,-8,-7,-6,-5,-4,-3,-2,-1]][(full_df["SEASON_ID"] == "2022-23") & (full_df["GP"] > 20)].sort_values("MWARPM", ascending = False)

In [225]:
full_df.sort_values("MWARPM", ascending=False)

Unnamed: 0,Season #,PLAYER_ID,PLAYER,SEASON_ID,TEAM_ID,TEAM,GP,MIN,FGM,FGA,...,TOV,PF,PTS,MOWAR,MDWAR,MPWAR,MIWAR,MWAR,MWARPM,MWARPG
761,6,203507,Giannis Antetokounmpo,2019-20,1610612749,MIL,63,1917.0,685,1238,...,230,195,1857,3011.15,156.5,372,2635.0,6174.65,3.220996,98.010317
355,1,1629029,Luka Doncic,2019-20,1610612742,DAL,61,2047.0,581,1255,...,260,153,1759,3186.4,75.5,834,2173.0,6268.9,3.062482,102.768852
972,Now,1629029,Luka Doncic,2022-23,1610612742,DAL,27,989,305,611,...,98,78,890,1523.75,81.0,408,975,2987.75,3.020981,110.657407
763,8,203507,Giannis Antetokounmpo,2021-22,1610612749,MIL,67,2204.0,689,1245,...,219,212,2002,3102.45,220.0,507,2688.0,6517.45,2.957101,97.275373
980,Now,203507,Giannis Antetokounmpo,2022-23,1610612749,MIL,24,786,266,505,...,88,86,734,1201.2,45.0,120,937,2303.2,2.93028,95.966667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
513,4,1629117,Wenyen Gabriel,2021-22,1610612751,BKN,1,1.0,0,0,...,0,0,0,0.0,0.0,0,-15.0,-15.0,-15.0,-15.0
1446,Now,1628998,Cody Martin,2022-23,1610612766,CHA,1,1,0,0,...,0,0,0,0.0,0.0,0,-17,-17.0,-17.0,-17.0
1445,Now,1631205,Buddy Boeheim,2022-23,1610612765,DET,1,1,0,0,...,0,0,0,0.0,0.0,0,-17,-17.0,-17.0,-17.0
1453,Now,1630701,Michael Foster Jr.,2022-23,1610612755,PHI,1,1,0,0,...,0,0,0,0.0,0.0,0,-17,-17.0,-17.0,-17.0


In [226]:
file_path = "C:/Github Repo/Mario-Jupyter-Codespace/data/basketballProject/analysis-files/"
file_name = "consolidated_evergreen_stats"
file_extension = ".csv"
full_path = file_path + file_name + file_extension
full_df.to_csv(full_path)