In [1]:
# import python libraries and nba library
import numpy as np
import nba_api as nba
from matplotlib import pyplot as plt
import pandas as pd
import socket
import logging
import os
import glob

In [2]:
# Get all files in working extraction data directory
path = "C:/Github Repo/NBA-Analytics/data/basketballProject/historical-extraction-files/"
csv_files = glob.glob(os.path.join(path,"*.csv"))

# bringing in all new files
extract_df = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file, index_col=1)
    frames = [extract_df, df]
    extract_df = pd.concat(frames)

In [3]:
# Get all files in working evergreen data directory
path = "C:/Github Repo/NBA-Analytics/data/basketballProject/evergreen-files/"
csv_files = glob.glob(os.path.join(path,"*.csv"))

# bring in evergreen file
evergreen_df = pd.DataFrame()
for file in csv_files:
    df = pd.read_csv(file, index_col=1)
    frames = [evergreen_df, df]
    evergreen_df = pd.concat(frames)

In [4]:
# Cosmetic changes and dropping duplicates that snuck in on import
extract_df = extract_df.rename(columns={"Unnamed: 0": "Season #"})
extract_df = extract_df.drop(columns=["LEAGUE_ID", "GS"], axis=1)
extract_df = extract_df.drop_duplicates()

evergreen_df = evergreen_df.drop_duplicates()
evergreen_df = evergreen_df.drop(columns=["Unnamed: 0", "RANK",'EFF','AST_TOV','STL_TOV'], axis=1)
evergreen_df.insert(0, "Season #", -1)
evergreen_df.insert(3, "SEASON_ID", "2022-23")
evergreen_df.insert(6,"Age",0)

In [5]:
extract_df = extract_df[(extract_df["SEASON_ID"] != "2022-23")]

In [6]:
full_df = pd.DataFrame(np.vstack([extract_df, evergreen_df]), columns=evergreen_df.columns)

In [7]:
FGMA = (full_df["FGA"] - full_df["FGM"])*1.25
FG3MA = (full_df["FG3A"] - full_df["FG3M"])*1.25
FTMA = (full_df["FTA"]-full_df["FTM"])*0.9
REB = 3*full_df["OREB"] + 2*full_df["DREB"]
ATR = 3*(full_df["AST"]-full_df["TOV"])
STB = 2*(full_df["BLK"]+full_df["STL"])
PF = full_df["PF"]*-0.5
PTS = full_df["PTS"]*1
MIN = full_df["MIN"] - 18*full_df["GP"]

full_df["MOWAR"] = FGMA + FG3MA + FTMA + PTS
full_df["MDWAR"] = STB + PF
full_df["MPWAR"] = ATR
full_df["MIWAR"] = REB + MIN
full_df["MWAR"] = FGMA + FG3MA + FTMA + REB + ATR + STB + PF + PTS + MIN
try:
    full_df["MWARPM"] = full_df["MWAR"]/full_df["MIN"]
except:
    full_df["MWARPM"] = 0
try:
    full_df["MWARPG"] = full_df["MWAR"]/full_df["GP"]
except:
    full_df["MWARPG"] = 0

In [8]:
file_path = "C:/Github Repo/NBA-Analytics/data/basketballProject/consolidated-files/"
# file_path = "../../data/basketballProject/analysis-files/"
file_name = "consolidated_historical_stats"
file_extension = ".csv"
full_path = file_path + file_name + file_extension
full_df.to_csv(full_path)