In [3]:
import pandas as pd
import numpy as np

df_als = pd.read_csv("nbaallstargames.csv") # All-Star data
df_stat = pd.read_csv("correct_data.csv", sep=";") # Season Statistics

# Delete seasons older than 1980
df_stat = df_stat[df_stat["Year"] >= 1980]

# Delete unnecessary statistics and customize the statistics in a per game fashion
deletedColumns = ["Column1", "Tm", "GS", "PER", "FTr", "ORB%", "DRB%", "TRB%", "AST%", "STL%", "BLK%", "TOV%", "blanl","OWS", "DWS","WS", "WS/48", "blank2", "OBPM", "DBPM","BPM", "VORP", "FG", "FGA", "3P", "3PA", "2P", "2PA", "eFG%", "FT", "FTA", "ORB", "DRB"]
df_stat = df_stat.drop(columns=deletedColumns)
df_stat = df_stat.assign(MinutesPlayed=df_stat["MP"].astype(int))
df_stat["PointsPerGame"] = (df_stat["PTS"] / df_stat["G"]).round(1)
df_stat["Rebounds"] = (df_stat["TRB"] / df_stat["G"]).round(1)
df_stat["Assists"] = (df_stat["AST"] / df_stat["G"]).round(1)
df_stat["Steals"] = (df_stat["STL"] / df_stat["G"]).round(1)
df_stat["Blocks"] = (df_stat["BLK"] / df_stat["G"]).round(1)
df_stat["Turnovers"] = (df_stat["TOV"] / df_stat["G"]).round(1)
df_stat["PersonalFouls"] = (df_stat["PF"] / df_stat["G"]).round(1)
df_stat = df_stat.assign(Age=df_stat["Age"].astype(int)) 
df_stat = df_stat.drop(columns=["G", "PTS", "TRB", "AST", "STL", "BLK", "TOV", "PF", "MP"])

# Reassign the player and year stat as one singular column for the Season Statistics and moving it the first column
df_stat = df_stat.assign(Season = df_stat["Player"].astype(str) + "-" + df_stat["Year"].astype(int).astype(str))
df_stat = df_stat.drop(columns=["Year", "Player"])
player_column = df_stat.pop("Season")
df_stat.insert(0, "Season", player_column)

# Reassign the player and year stat as one singular column for the All-Atar data and moving it the first column
df_als = df_als.assign(Season = df_als["Player"].astype(str) + "-" + df_als["Year"].astype(int).astype(str))
df_als = df_als.drop(columns=["Player", "Year"])
player_column = df_als.pop("Season")
df_als.insert(0, "Season", player_column)

# Merge the two dataframes based on Season and set Role as the new column for
df_merged = df_stat.merge(df_als[["Season", "Role"]], on="Season", how="left")

# Mapping the "Role" to numerical values
role_mapping = {
    "Starter": 2,
    "Reserve": 1,
    "DNP": 1  # DNP should be treated the same as "Reserve"
}

# Map the correct numbers on to the Seasons. Give 0 to all of the players not mentioned in the All-Star Data
df_merged["AllStarStatus"] = df_merged["Role"].map(role_mapping).fillna(0).astype(int)

# Drop the column Role because it is now described by 0, 1 or 2
df_merged = df_merged.drop(columns=["Role"])

df_merged.head()

['c:\\Users\\silvo\\AppData\\Local\\Programs\\Python\\Python312\\python312.zip', 'c:\\Users\\silvo\\AppData\\Local\\Programs\\Python\\Python312\\DLLs', 'c:\\Users\\silvo\\AppData\\Local\\Programs\\Python\\Python312\\Lib', 'c:\\Users\\silvo\\AppData\\Local\\Programs\\Python\\Python312', '', 'C:\\Users\\silvo\\AppData\\Roaming\\Python\\Python312\\site-packages', 'C:\\Users\\silvo\\AppData\\Roaming\\Python\\Python312\\site-packages\\win32', 'C:\\Users\\silvo\\AppData\\Roaming\\Python\\Python312\\site-packages\\win32\\lib', 'C:\\Users\\silvo\\AppData\\Roaming\\Python\\Python312\\site-packages\\Pythonwin', 'c:\\Users\\silvo\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages']


Unnamed: 0,Season,Pos,Age,TS%,3PAr,USG%,FG%,3P%,2P%,FT%,MinutesPlayed,PointsPerGame,Rebounds,Assists,Steals,Blocks,Turnovers,PersonalFouls,AllStarStatus
0,Georges Niang-2017,PF,23,0.285,0.333,20.2,0.25,0.083,0.333,1.0,93,0.9,0.7,0.2,0.1,0.0,0.2,0.3,0
1,Mike Tobey-2017,C,22,0.25,0.0,9.0,0.25,,0.25,,25,1.0,1.5,0.5,0.0,0.0,0.5,1.0,0
2,Isaiah Taylor-2017,PG,22,0.19,0.286,6.4,0.143,0.0,0.2,0.5,52,0.8,0.8,0.8,0.2,0.2,0.0,1.5,0
3,Chris McCullough-2017,PF,21,0.266,1.0,15.7,0.0,0.0,,0.5,8,0.5,1.0,0.0,0.5,0.0,0.5,0.0,0
4,Diamond Stone-2017,C,19,0.339,0.0,31.3,0.231,,0.231,1.0,24,1.4,0.9,0.0,0.0,0.1,0.3,0.7,0
