In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from scipy.stats import norm
import datetime
from datetime import date
import requests
from bs4 import BeautifulSoup
import os
import sys

import warnings
warnings.filterwarnings("ignore")

In [None]:
def ConvertStrToTime(time_str):
    return str(datetime.datetime.strptime(time_str, '%M:%S').time())

def time_to_sec(t):
    h, m, s = map(int, t.split(':'))
    return h * 3600 + m * 60 + s

### If you are running script for singular player, uncomment the code right below this cell, update to desired values and run script from the start.

In [None]:
# player_name  = 'James Harden'
# opposing_team = 'MIL'
# player_prop = .5 + 4
# fd_odds = "O"
#  
# # Home Game
# home = 0

In [None]:
# Automatically Set Away to opposite of home
if home == 0:
    away = 1
else:
    away = 0

In [None]:
fwd = os.getcwd()

## Data Extract 1: Extracting Player Data

In [None]:
# Desired Year Range for data extraction
today = date.today()
current_year = today.strftime("%Y")
end_range = int(current_year) + 1
start_range = end_range - 2

# Integrate special logic for Miles Bridges since he likes to beat women and fuck up my code logic in the process...
if (player_name == 'Miles Bridges') or (player_name == 'Ja Morant'):
    years = list(range(start_range - 1, end_range))
else:
    years = list(range(start_range, end_range))
    
# Extract URL_Parameter/BBRef_PlayerID from mapping file
mapping = pd.read_excel('REFERENCE TABLE - Player URL Mapping 2023-2024 Season.xlsx')

if any(mapping['Name'] == player_name.lower()):
    player_id = list(mapping['URL_Param'][mapping['Name'] == player_name.lower()])[0]
    player_position = list(mapping['Pos'][mapping['Name'] == player_name.lower()])[0]
else:
    raise IndexError("Main Script Error: Player not in reference file! If you are in the BulkUpload script, this row will get skipped.")
    sys.exit(0)

df_list = []

for year in years:
    url = f'https://www.basketball-reference.com/players/l/{player_id}/gamelog/{year}'
    try:
        data = requests.get(url) #retrieve data from basketball reference
        page = data.text #extract html

        soup = BeautifulSoup(page, 'html.parser') # parse html
        pgl_basic_table = soup.find(id="pgl_basic") #locate the pgl_basic table
        pgl_df = pd.read_html(str(pgl_basic_table))[0] #read it in to pandas
        df_list.append(pgl_df) # add to list of dfs
    except ValueError:
        continue

player_data = pd.concat(df_list)

In [None]:
player_data = player_data[player_data['Tm'] != 'Tm']

## Data Extract 2: Extracting Team Data

In [None]:
player_teams = list(player_data['Tm'].unique())

team_df_list = []

for year in years:
    for team in player_teams:
        url = f'https://www.basketball-reference.com/teams/{team}/{year}/gamelog/'

        data = requests.get(url) #retrieve data from basketball reference
        page = data.text #extract html

        soup = BeautifulSoup(page, 'html.parser') # parse html
        tgl_basic_table = soup.find(id="tgl_basic") #locate the tgl_basic table
        df = pd.read_html(str(tgl_basic_table))[0] #read it in to pandas
        df.columns = df.columns.droplevel(0)  # Clean the multiindex columns
        df['Team'] = team
        team_df_list.append(df)

team_data = pd.concat(team_df_list)

In [None]:
team_data = team_data[team_data['Rk'] != 'Rk']
team_data = team_data[~team_data['Rk'].isna()]

In [None]:
# Update column naming conventions
new_cols = []

for i in range(len(list(team_data))):
    if i > 24:
        new_cols.append(list(team_data)[i] + '.1')
    else:
        new_cols.append(list(team_data)[i])

team_data.columns = new_cols

## Data Extract 3: Extracting Opposing Team Data

In [None]:
opp_url = f'https://www.basketball-reference.com/teams/{opposing_team}/2024/gamelog/'

data = requests.get(opp_url) #retrieve data from basketball reference
page = data.text #extract html

soup = BeautifulSoup(page, 'html.parser') # parse html
opp_tgl_basic_table = soup.find(id="tgl_basic") #locate the tgl_basic table
opp_team = pd.read_html(str(opp_tgl_basic_table))[0] #read it in to pandas

opp_team.columns = opp_team.columns.droplevel(0) # Clean the multiindex columns
opp_team['Team'] = opposing_team

opp_team = opp_team[opp_team['Rk'] != 'Rk']
opp_team = opp_team[~opp_team['Rk'].isna()]

In [None]:
# Update column naming conventions
new_cols = []

for i in range(len(list(opp_team))):
    if i > 24:
        new_cols.append(list(opp_team)[i] + '.1')
    else:
        new_cols.append(list(opp_team)[i])

opp_team.columns = new_cols

## Player's Team Game Log

In [None]:
# Cleaning Team Game Log Data
team_data = team_data.rename(columns = {'Unnamed: 3_level_1':'Home/Away'})
team_data.loc[team_data['Home/Away'] == '@', 'Home/Away'] = 'Away'
team_data.loc[team_data['Home/Away'].isna(), 'Home/Away'] = 'Home'

# Converting to proper data types
team_data['Date'] = pd.to_datetime(team_data['Date'])

# Creating column for Defensive Rebounds
team_data['TRB'] = team_data['TRB'].astype('int')
team_data['ORB'] = team_data['ORB'].astype('int')
team_data['DRB'] = team_data['TRB'] - team_data['ORB']

team_data['TRB.1'] = team_data['TRB.1'].astype('int')
team_data['ORB.1'] = team_data['ORB.1'].astype('int')
team_data['DRB.1'] = team_data['TRB.1'] - team_data['ORB.1']

# Removing erroneous columns
team_log_df = team_data.drop(columns = ['Rk', 'G', 'Unnamed: 24_level_1'])

In [None]:
# Trimming Data to key columns
tldf2 = team_log_df[['Date', 'Team.1', 'FG', 'FGA', 'FTA', 'ORB', 'DRB', 'TOV', 'FG.1', 'FGA.1', 'FTA.1', 'ORB.1', 'DRB.1', 'TOV.1']]

# Convert everything possible into numbers to avoid computational string errors
tldf2.iloc[:, 2:] = tldf2.iloc[:, 2:].apply(pd.to_numeric, errors='ignore')

In [None]:
# Formula based on Basketball reference's calculation of possessions [https://www.basketball-reference.com/about/glossary.html#poss]
tldf2['Poss'] = 0.5 * ((tldf2['FGA'] + 0.4 * tldf2['FTA'] - 1.07 * (tldf2['ORB'] / (tldf2['ORB'] + tldf2['DRB'])) * (tldf2['FGA'] - tldf2['FG']) + tldf2['TOV']) +
                        (tldf2['FGA.1'] + 0.4 * tldf2['FTA.1']  - 1.07 * (tldf2['ORB.1'] / (tldf2['ORB.1'] + tldf2['DRB.1'])) * (tldf2['FGA.1'] - tldf2['FG.1']) + tldf2['TOV.1']))

## Opponsing Team's Game Log

In [None]:
# Converting into number to avoid any string parsing issues
opp_team['TRB'] = opp_team['TRB'].astype('float')
opp_team['ORB'] = opp_team['ORB'].astype('float')
opp_team['TRB.1'] = opp_team['TRB.1'].astype('float')
opp_team['ORB.1'] = opp_team['ORB.1'].astype('float')

# Cleaning Team Game Log Data
opp_team = opp_team.rename(columns = {'Unnamed: 3_level_1':'Home/Away'})
opp_team.loc[opp_team['Home/Away'] == '@', 'Home/Away'] = 'Away'
opp_team.loc[opp_team['Home/Away'].isna(), 'Home/Away'] = 'Home'

# Converting to proper data types
opp_team['Date'] = pd.to_datetime(opp_team['Date'])

# Creating column for Defensive Rebounds
opp_team['DRB'] = opp_team['TRB'] - opp_team['ORB']
opp_team['DRB.1'] = opp_team['TRB.1'] - opp_team['ORB.1']

# Removing erroneous columns
opp_team_log_df = opp_team.drop(columns = ['Rk', 'G', 'Unnamed: 24_level_1'])

In [None]:
# Trimming Data to key columns
otl_df2 = opp_team_log_df[['Date', 'Home/Away', 'FG', 'FGA', 'FTA', 'ORB', 'DRB', 'TOV', 'FG.1', 'FGA.1', 'FTA.1', 'ORB.1', 'DRB.1', 'TOV.1']]

# Convert everything possible into numbers to avoid computational string errors
otl_df2.iloc[:, 2:] = otl_df2.iloc[:, 2:].apply(pd.to_numeric, errors='ignore')

In [None]:
# Formula based on Basketball reference's calculation of possessions [https://www.basketball-reference.com/about/glossary.html#poss]
otl_df2['Poss'] = 0.5 * ((otl_df2['FGA'] + 0.4 * otl_df2['FTA'] - 1.07 * (otl_df2['ORB'] / (otl_df2['ORB'] + otl_df2['DRB'])) * (otl_df2['FGA'] - otl_df2['FG']) + otl_df2['TOV']) +
                        (otl_df2['FGA.1'] + 0.4 * otl_df2['FTA.1']  - 1.07 * (otl_df2['ORB.1'] / (otl_df2['ORB.1'] + otl_df2['DRB.1'])) * (otl_df2['FGA.1'] - otl_df2['FG.1']) + otl_df2['TOV.1']))

## Player Game Log

In [None]:
# Remove Inactive games, and extraneous column names imported from the concatenation of multiple DFs
player_data = player_data[~player_data['MP'].isin(['Inactive', 'Did Not Play', 'MP', 'Did Not Dress', 'Not With Team', 'Player Suspended'])]

# Cleaning Player Data
player_data['MP'] = player_data['MP'].apply(ConvertStrToTime)
player_data['MP'] = player_data['MP'].apply(time_to_sec)/60

player_data = player_data.rename(columns = {'Unnamed: 5':'Home/Away', 'Unnamed: 7': 'Outcome'})
player_data['Home/Away'][player_data['Home/Away'] == '@'] = 'Away'
player_data['Home/Away'][player_data['Home/Away'].isna()] = 'Home'

#Converting to proper data types
player_data['Date'] = pd.to_datetime(player_data['Date'])

# Removing erroneous columns
player_df = player_data.drop(columns = ['Rk', 'G'])

## Master Data (Player Game Log + Player's Team Game Log)

In [None]:
# Joined Player Game Log and His Team's Game Log
master_data = player_df.merge(tldf2, left_on=['Date', 'Tm'], right_on =['Date', 'Team.1'], how= 'left', suffixes = ("","_TL"))

# Convert everything possible into numbers to avoid computational string errors
master_data.iloc[:, 7:] = master_data.iloc[:, 7:].apply(pd.to_numeric, errors='ignore')

In [None]:
# Cause Bismack is stupid
master_data = master_data[master_data['MP'] > 0]

In [None]:
# Calculating Available Rebounds and Rebounding Rate
master_data['Available Rebounds'] = (master_data['FGA_TL'] + master_data['FGA.1']) -  (master_data['FG_TL'] + master_data['FG.1'])
master_data['Rebounding Rate'] = master_data['TRB']/master_data['Available Rebounds']

In [None]:
# Calculating ORB% and DRB%
master_data['ORB%'] = master_data['ORB']/(master_data['FGA_TL'] - master_data['FG_TL'])
master_data['DRB%'] = master_data['DRB']/(master_data['FGA.1'] - master_data['FG.1'])

In [None]:
# Calculating Total Rebounds Per Minute
master_data['TRB'] = master_data['TRB'].astype('float')
master_data['TRBPM'] = master_data['TRB']/master_data['MP']

In [None]:
# Get Date Rankings to slice through data easier
master_data['Date_Rank'] =  master_data['Date'].rank()

In [None]:
# Dummy Encoding Home and Away games to factor into regression model
master_data['Home'] = None
master_data['Home'][master_data['Home/Away'] == 'Home'] = 1
master_data['Home'][master_data['Home'].isna()] = 0

master_data['Away'] = None
master_data['Away'][master_data['Home/Away'] == 'Away'] = 1
master_data['Away'][master_data['Away'].isna()] = 0

## Regression Model

In [None]:
df = master_data[['TRB', 'MP', 'TRBPM', 'Rebounding Rate', 'ORB%', 'DRB%', 'Poss', 'Home', 'Away']]

X = df.iloc[:, 1:]
y = df.iloc[:, :1]

# To visually compare model performance to actual data look at these variables
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = np.mean((y_pred - y_test) ** 2)
# print("Mean Squared Error:", mse)

#### Visualizing Model Performance

In [None]:
# X_test['Predicted Rebounds'] = y_pred
# master_data.merge(X_test, on = ['MP', 'TRBPM', 'Rebounding Rate', 'ORB%', 'DRB%', 'Poss', 'Home', 'Away'])[['Date', 'Opp', 'TRB', 'Predicted Rebounds']]

## Model Execution

In [None]:
recent_data = master_data[master_data['Date_Rank'] >= master_data['Date_Rank'].max() - 6] # last 7 games
historical_data = master_data[master_data['Date_Rank'] < master_data['Date_Rank'].max() - 6] # all other games before the last 7

## Core Variable 1: Minutes Played (MP)

In [None]:
simulations = 250_000

In [None]:
# Overall median and std_dev minutes played for all data
median_mp = historical_data['MP'].median()
sd_mp = master_data['MP'].std()
overall_weight_mp = .3

# mean of last n-number of games to factor in recent performance
recent_mp = recent_data['MP'].mean()
recent_weight_mp = .7

# Calculate weighted average of overall performance and recent performance
weighted_avg_mp = (median_mp * overall_weight_mp) + (recent_mp * recent_weight_mp)

# Conducting Monte Carlo Simulations for Minutes Played
proj_mp = norm.ppf(np.random.random(simulations), loc=weighted_avg_mp, scale=sd_mp)

In [None]:
# Calculating Projected Minutes (Median MP from 250k Simulations)
mp = pd.DataFrame(proj_mp).median()[0]

## Core Variable 2: Total Rebounds Per Minute (TRBPM)

In [None]:
# Overall median and std_dev trbpm for all data
median_trbpm = historical_data['TRBPM'].median()
sd_trbpm = master_data['TRBPM'].std()
overall_weight_trb = .7

recent_trbpm = recent_data['TRB'].sum()/recent_data['MP'].sum()
recent_weight_trb = .3

weighted_avg_trbpm = (median_trbpm * overall_weight_trb) + (recent_trbpm * recent_weight_trb)

proj_trbpm = norm.ppf(np.random.random(simulations), loc=weighted_avg_trbpm, scale=sd_trbpm)

In [None]:
# Calculating Projected TRBPM
trbpm = pd.DataFrame(proj_trbpm).median()[0]

## Core Variable 3: Rebounding Rate (RR)

In [None]:
# Overall median and std_dev trbpm for all data
median_rr = historical_data['Rebounding Rate'].median()
sd_rr = master_data['Rebounding Rate'].std()
overall_weight_rr = .7

recent_rr = recent_data['TRB'].sum()/recent_data['Available Rebounds'].sum()
recent_weight_rr = .3

weighted_avg_rr = (median_rr * overall_weight_rr) + (recent_rr * recent_weight_rr)

proj_rr = norm.ppf(np.random.random(simulations), loc=weighted_avg_rr, scale=sd_rr)

In [None]:
# Calculating Projected Rebounding Rate
rr = pd.DataFrame(proj_rr).median()[0]

## Core Variable 4: Offensive Rebounding Percentage (ORB%)

In [None]:
# Overall median and std_dev trbpm for all data
median_orbp = historical_data['ORB%'].median()
sd_orbp = master_data['ORB%'].std()
overall_weight_orbp = .7

recent_orbp = recent_data['ORB'].sum()/(recent_data['FGA_TL'].sum() - recent_data['FG_TL'].sum())
recent_weight_orbp = .3

weighted_avg_orbp = (median_orbp * overall_weight_orbp) + (recent_orbp * recent_weight_orbp)

proj_orbp = norm.ppf(np.random.random(simulations), loc=weighted_avg_orbp, scale=sd_orbp)

In [None]:
# Calculating Projected ORB Percentage
orbp = pd.DataFrame(proj_orbp).median()[0]

## Core Variable 5: Defensive Rebounding Percentage (DRB%)

In [None]:
# Overall median and std_dev trbpm for all data
median_drbp = historical_data['DRB%'].median()
sd_drbp = master_data['DRB%'].std()
overall_weight_drbp = .7

recent_drbp = recent_data['ORB'].sum()/(recent_data['FGA_TL'].sum() - recent_data['FG_TL'].sum())
recent_weight_drbp = .3

weighted_avg_drbp = (median_drbp * overall_weight_drbp) + (recent_drbp * recent_weight_drbp)

proj_drbp = norm.ppf(np.random.random(simulations), loc=weighted_avg_drbp, scale=sd_drbp)

In [None]:
# Calculating Projected ORB Percentage
drbp = pd.DataFrame(proj_drbp).median()[0]

## Core Variable 6: Possessions/Pace

In [None]:
avg_team_poss = master_data['Poss'][master_data['Home/Away'] == 'Away'].mean()
avg_opp_team_poss = otl_df2['Poss'][otl_df2['Home/Away'] == 'Home'].mean()
agg_sd_poss = pd.DataFrame(list(master_data['Poss'][master_data['Home/Away'] == 'Away']) + list(otl_df2['Poss'][otl_df2['Home/Away'] == 'Home'])).std()[0]

# Average Both Teams posessions to get an expected
expected_matchup_poss = (avg_team_poss + avg_opp_team_poss)/2

proj_poss = norm.ppf(np.random.random(simulations), loc=expected_matchup_poss, scale=agg_sd_poss)

In [None]:
poss = pd.DataFrame(proj_poss).median()[0]

## Core Variable 7: Home/Away Game
- Established above as it is entered manually by user.

## Using the regression set up to calculate predicted rebounds for all Monte Carlo simulations

In [None]:
c1 = pd.DataFrame(proj_mp, columns = ['Sim_MP'])
c2 = pd.DataFrame(proj_trbpm, columns = ['Sim_TRBPM'])
c3 = pd.DataFrame(proj_rr, columns = ['Sim_RR'])
c4 = pd.DataFrame(proj_orbp, columns = ['Sim_ORBP'])
c5 = pd.DataFrame(proj_drbp, columns = ['Sim_DRBP'])
c6 = pd.DataFrame(proj_poss, columns = ['Sim_Poss'])

# Get the simulations dataset ready for input to the regression predictions
mc_simulations = pd.concat([c1, c2, c3, c4, c5, c6], axis = 1)
mc_simulations['Home'] = home
mc_simulations['Away'] = away

# This section was updated to address @Sammy Mac OS error
# Error: System was unable to map the column names in the simulation dataset to the original column names provided from training data
# Did not seem to be a problem in local PC (Personal and Work)
og_col_names = list(mc_simulations.columns)
mc_simulations.columns = list(df)[1:]

# Make Predictions
mc_simulations['Predicted Rebounds'] = model.predict(mc_simulations)

mc_simulations.columns = og_col_names + ['Predicted Rebounds'] # Rename columns back to "Sim_" to avoid any additional errors later in script.

## Player Details

In [None]:
mp = mc_simulations['Sim_MP'].median()
trbpm = mc_simulations['Sim_TRBPM'].median()
rr = mc_simulations['Sim_RR'].median()
orbp = mc_simulations['Sim_ORBP'].median()
drbp = mc_simulations['Sim_DRBP'].median()
poss = mc_simulations['Sim_Poss'].median()
median_predicted_reb = mc_simulations['Predicted Rebounds'].median()
over_probability = len(mc_simulations[mc_simulations['Predicted Rebounds'] > player_prop])/len(mc_simulations)
under_probability = len(mc_simulations[mc_simulations['Predicted Rebounds'] < player_prop])/len(mc_simulations)

### Integration of Analysis Process (From Master Dataset)

In [None]:
master_data['Player'] = player_name
last_15 = master_data[master_data['Date_Rank'] >= master_data['Date_Rank'].max() - 14]

# Prepping data to calculate the Exceed Rate
last_15['Exceed'] = 0
last_15['Exceed'][last_15['TRB'] > player_prop] = 1

# Getting the Output Simulated MP from Montecarlo and comparing vs his last 15 games
# Prepping Data for Minutes Test
median_sim_mp = mp
last_15['Abs_Diff'] = abs(last_15['MP'] - median_sim_mp)

# Minutes Test Standardization
last_15['Within_3'] = 0
last_15['Within_3'][last_15['Abs_Diff'] <= 3] = 1

# Validation/Analysis Columns
exceed_rate = last_15['Exceed'].mean()
minutes_test = last_15['Within_3'].mean()

In [None]:
row_data = [player_position, player_name, mp, trbpm, poss, player_prop, median_predicted_reb, rr, orbp, drbp, over_probability, under_probability, fd_odds, minutes_test, exceed_rate]
row_df = pd.DataFrame(row_data).T
row_df.columns = ['Role', 'Player', 'MP', 'TRBPM', 'Possessions', 'Player Prop', 'Predicted Rebs', 'RR', 'ORBP%', 'DRBP%', 'O%', 'U%', 'FanDuel Favorite', 'Minutes Test', 'Exceed Rate']
row_df

### If looking to generate details file, uncomment code below

In [None]:
row_df = row_df[['Role', 'Player', 'MP', 'TRBPM', 'Player Prop', 'Predicted Rebs', 'O%', 'U%', 'FanDuel Favorite', 'Minutes Test', 'Exceed Rate']]

# detail_dir = r"C:\Users\loudr\Desktop\NBA Sports Betting Models\Python Exports\Detail Outputs" # Update here @Sammy
# os.chdir(detail_dir)
# row_df_export.to_excel(f'{player_name} Details.xlsx', index = False)
# print(f'File Saved to {detail_dir}!')

## Player Prop Map (Spread)

In [None]:
# Creating the Probability/Prop Map
val_list = []
probabilities = []

for val in list(np.arange(1, 20.5,  .5)):
    val_list.append(val)
    probabilities.append(len(mc_simulations[mc_simulations['Predicted Rebounds'] > val])/len(mc_simulations))

In [None]:
data = {'Prop Value': val_list, 'Over %':probabilities}

prop_spread_df = pd.DataFrame(data)
prop_spread_df['Prop Value'] = round(prop_spread_df['Prop Value'],2)

In [None]:
#transpose data
prop_spread_df_transposed = prop_spread_df.T

# Make the props the header
headers = prop_spread_df_transposed.iloc[0]
prop_map_df  = pd.DataFrame(prop_spread_df_transposed.values[1:], columns=headers)

In [None]:
# Bring in player name
prop_map_df['Player'] = player_name

#rearrange columns to show player name first
cols = list(prop_map_df.columns)
cols = [cols[-1]] + cols[:-1]
prop_map_df = prop_map_df[cols]

### If looking to generate prop map, uncomment code below

In [None]:
# prop_dir = r"C:\Users\loudr\Desktop\NBA Sports Betting Models\Python Exports\Prop Outputs" # Update here @Sammy
# os.chdir(prop_dir)
# prop_map_df.to_excel(f'{player_name} Prop.xlsx', index = False)
# print(f'File saved to {prop_dir}!')

In [None]:
# os.chdir(fwd)

### List of DataFrames being Exported in (2) - Execution_Script_BulkUpload...

In [None]:
# row_df
# prop_map_df
# master_data

# List broken into 5 rows for readibility
# Line 1 = logistical details (date, age, team, etc...)
# line 2 = player specific stat lines
# line 3 = team log stats
# line 4 = opposing team stats
# line 5 = calculated metrics derived from lines 2 - 4
new_col_names = {'Date':'date', 'Age':'age', 'Tm':'tm', 'Home/Away':'home_away', 'Opp':'opponent', 'Outcome':'outcome', 'GS':'gs',
'MP':'mp', 'FG':'fg', 'FGA':'fga', 'FG%':'fg_perc', '3P':'3p', '3PA':'3pa', '3P%':'3p_perc', 'FT':'ft', 'FTA':'fta', 'FT%':'ft_perc', 'ORB':'orb', 'DRB':'drb', 'TRB':'trb', 'AST':'ast', 'STL':'stl', 'BLK':'blk', 'TOV':'tov', 'PF':'pf', 'PTS':'pts', 'GmSc':'gmsc', '+/-':'plus_minus',
'Team.1':'team_tl', 'FG_TL':'fg_tl', 'FGA_TL':'fga_tl', 'FTA_TL':'fta_tl', 'ORB_TL':'orb_tl', 'DRB_TL':'drb_tl', 'TOV_TL':'tov_tl', 
'FG.1':'fg_opponent', 'FGA.1':'fga_opponent', 'FTA.1':'fta_opponent', 'ORB.1':'orb_opponent', 'DRB.1':'drb_opponent', 'TOV.1':'tov_opponent',
'Poss':'possessions', 'Available Rebounds':'avail_rebounds', 'Rebounding Rate':'rebound_rate', 'ORB%':'orb_perc', 'DRB%':'drb_perc', 'TRBPM':'total_rebs_per_min', 'Date_Rank':'date_rank', 'Home':'home_flag', 'Away':'away_flag', 'Player':'player_name'}

master_data_df = master_data.rename(columns = new_col_names)