In [20]:
import pandas as pd
import numpy as np
from catnip.fla_redshift import FLA_Redshift
from sqlalchemy import null
from datetime import datetime

from prefect.blocks.system import Secret
from typing import Dict
from concurrent.futures import ThreadPoolExecutor

from sklearn.preprocessing import MinMaxScaler
from dataclasses import dataclass, field
import requests
import calendar
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.metrics import confusion_matrix as cm

In [21]:
def get_redshift_credentials() -> Dict:

    cred_dict = {
        "dbname": Secret.load("stellar-redshift-db-name").get(),
        "host": Secret.load("stellar-redshift-host").get(),
        "port": 5439,
        "user": Secret.load("stellar-redshift-user-name").get(),
        "password": Secret.load("stellar-redshift-password").get(),

        "aws_access_key_id": Secret.load("fla-s3-aws-access-key-id-east-1").get(),
        "aws_secret_access_key": Secret.load("fla-s3-aws-secret-access-key-east-1").get(),
        "bucket": Secret.load("fla-s3-bucket-name-east-1").get(),
        "subdirectory": "us-east-1",

        "verbose": False,
    }

    return cred_dict

with ThreadPoolExecutor(1) as pool:
    rs_creds = pool.submit(lambda: get_redshift_credentials()).result()

In [30]:
panthers_df = pd.read_csv("C:\\Users\\riffere\\Desktop\\panthers_results.csv")

In [32]:
panthers_df['weekday'] = panthers_df.apply(lambda row: datetime.strptime(row['event_date'], '%m/%d/%y').weekday(), axis =1)

In [34]:
# density
calendar_df = pd.DataFrame(columns = ['days_of_the_year'], data = 
    ['01-01','01-02','01-03','01-04','01-05','01-06','01-07','01-08','01-09','01-10','01-11',
    '01-12','01-13','01-14','01-15','01-16','01-17','01-18','01-19','01-20','01-21','01-22',
    '01-23','01-24','01-25','01-26','01-27','01-28','01-29','01-30','01-31',
    '02-01','02-02','02-03','02-04','02-05','02-06','02-07','02-08','02-09','02-10','02-11',
    '02-12','02-13','02-14','02-15','02-16','02-17','02-18','02-19','02-20','02-21','02-22',
    '02-23','02-24','02-25','02-26','02-27','02-28',
    '03-01','03-02','03-03','03-04','03-05','03-06','03-07','03-08','03-09','03-10','03-11',
    '03-12','03-13','03-14','03-15','03-16','03-17','03-18','03-19','03-20','03-21','03-22',
    '03-23','03-24','03-25','03-26','03-27','03-28','03-29','03-30','03-31',
    '04-01','04-02','04-03','04-04','04-05','04-06','04-07','04-08','04-09','04-10','04-11',
    '04-12','04-13','04-14','04-15','04-16','04-17','04-18','04-19','04-20','04-21','04-22',
    '04-23','04-24','04-25','04-26','04-27','04-28','04-29','04-30',
    '05-01','05-02','05-03','05-04','05-05','05-06','05-07','05-08','05-09','05-10','05-11',
    '05-12','05-13','05-14','05-15','05-16','05-17','05-18','05-19','05-20','05-21','05-22',
    '05-23','05-24','05-25','05-26','05-27','05-28','05-29','05-30','05-31',
    '06-01','06-02','06-03','06-04','06-05','06-06','06-07','06-08','06-09','06-10','06-11',
    '06-12','06-13','06-14','06-15','06-16','06-17','06-18','06-19','06-20','06-21','06-22',
    '06-23','06-24','06-25','06-26','06-27','06-28','06-29','06-30',
    '07-01','07-02','07-03','07-04','07-05','07-06','07-07','07-08','07-09','07-10','07-11',
    '07-12','07-13','07-14','07-15','07-16','07-17','07-18','07-19','07-20','07-21','07-22',
    '07-23','07-24','07-25','07-26','07-27','07-28','07-29','07-30','07-31',
    '08-01','08-02','08-03','08-04','08-05','08-06','08-07','08-08','08-09','08-10','08-11',
    '08-12','08-13','08-14','08-15','08-16','08-17','08-18','08-19','08-20','08-21','08-22',
    '08-23','08-24','08-25','08-26','08-27','08-28','08-29','08-30','08-31',
    '09-01','09-02','09-03','09-04','09-05','09-06','09-07','09-08','09-09','09-10','09-11',
    '09-12','09-13','09-14','09-15','09-16','09-17','09-18','09-19','09-20','09-21','09-22',
    '09-23','09-24','09-25','09-26','09-27','09-28','09-29','09-30',
    '10-01','10-02','10-03','10-04','10-05','10-06','10-07','10-08','10-09','10-10','10-11',
    '10-12','10-13','10-14','10-15','10-16','10-17','10-18','10-19','10-20','10-21','10-22',
    '10-23','10-24','10-25','10-26','10-27','10-28','10-29','10-30','10-31',
    '11-01','11-02','11-03','11-04','11-05','11-06','11-07','11-08','11-09','11-10','11-11',
    '11-12','11-13','11-14','11-15','11-16','11-17','11-18','11-19','11-20','11-21','11-22',
    '11-23','11-24','11-25','11-26','11-27','11-28','11-29','11-30',
    '12-01','12-02','12-03','12-04','12-05','12-06','12-07','12-08','12-09','12-10','12-11',
    '12-12','12-13','12-14','12-15','12-16','12-17','12-18','12-19','12-20','12-21','12-22',
    '12-23','12-24','12-25','12-26','12-27','12-28','12-29','12-30','12-31'])

def get_range(date : datetime.date, calendar_df_active):
    month = date[0:2]
    day = date[3:5]
    year = int(date[6:])
    if calendar.isleap(year):
        leap_day = pd.DataFrame(columns = ['days_of_the_year'], data = ['02-29'])
        calendar_df_active = pd.concat([calendar_df_active[0:59], leap_day, calendar_df_active[59:]], ignore_index= True)
    date = str(str(month) + '-' + str(day))
    if date < '01-04':
        a = calendar_df_active[:calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]+4]
        left = 7 - len(a)
        b = calendar_df_active[(365-left):].values
        total = np.concatenate((a,b), axis = 0)
    elif date > '12-28':
        b = calendar_df_active[calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]-3:]
        left = 7 - len(b)
        a = calendar_df_active[0:(left)].values
        total = np.concatenate((b,a), axis = 0)
    else:
        total = calendar_df_active[calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]-3:
                calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]+4].values
    val = 0
    for i in total:
        month = i[0][0:2]
        day = i[0][3:]
        date = str(month + '/' + day + '/'+str(year))
        if date in event_dates:
            val += 1
    return val

In [35]:
event_dates = panthers_df['event_date'].values
panthers_df['fla_density'] = panthers_df.apply(lambda row: get_range(row['event_date'], calendar_df),axis = 1)

In [36]:
def get_back(date, calendar_df_active):
    month = date[0:2]
    day = date[3:5]
    year = int(date[6:])
    if calendar.isleap(year):
        leap_day = pd.DataFrame(columns = ['days_of_the_year'], data = ['02-29'])
        calendar_df_active = pd.concat([calendar_df_active[0:59], leap_day, calendar_df_active[59:]], ignore_index= True)
    date = str(str(month) + '-' + str(day))
    if date == '01-01':
        total = ['12-31','01-01', '01-02']
    elif date == '12-31':
        total = ['12-30', '12-31', '01-01']
    else:
        total = calendar_df_active[calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]-1:
                calendar_df_active.index[calendar_df_active['days_of_the_year'] == date].tolist()[0]+2].values
    new_list = []
    for i in total:
        month = i[0][0:2]
        day = i[0][3:]
        date = str(month + '/' + day + '/'+str(year))
        new_list.append(date)
    if new_list[0] in event_dates:
        val = 2
    elif new_list[2] in event_dates:
        val = 1
    else:
        val = 0
    return val

In [37]:
panthers_df['fla_back_to_back'] = panthers_df.apply(lambda row: get_back(row['event_date'], calendar_df),axis = 1)

In [38]:
panthers_df['game_loc'] = panthers_df.apply(lambda row: row['opponent'] if row['side'] == 'Away' else 'FLA', axis = 1)
panthers_df['prev_game_loc'] = panthers_df.game_loc.shift(1)
panthers_df.at[0, 'prev_game_loc'] = 'FLA'

In [39]:
panthers_df

Unnamed: 0,season,event_date,id,side,team_name,opponent,result,weekday,fla_density,fla_back_to_back,game_loc,prev_game_loc
0,20212022,10/14/21,2021020010,Home,FLA,PIT,Win,3,2,0,FLA,FLA
1,20212022,10/16/21,2021020025,Home,FLA,NYI,Win,5,3,0,FLA,FLA
2,20212022,10/19/21,2021020042,Away,FLA,TBL,Win,1,3,0,TBL,FLA
3,20212022,10/21/21,2021020055,Home,FLA,COL,Win,3,3,0,FLA,TBL
4,20212022,10/23/21,2021020070,Away,FLA,PHI,Win,5,3,0,PHI,FLA
...,...,...,...,...,...,...,...,...,...,...,...,...
323,20242025,04/08/25,2024021237,Home,FLA,TOR,,1,4,0,FLA,DET
324,20242025,04/10/25,2024021251,Home,FLA,DET,,3,3,0,FLA,FLA
325,20242025,04/12/25,2024021268,Home,FLA,BUF,,5,4,0,FLA,FLA
326,20242025,04/14/25,2024021287,Home,FLA,NYR,,0,3,1,FLA,FLA


In [40]:
nhl_distances = pd.read_csv('C:\\Users\\riffere\\Desktop\\NHL_Distance.csv')
nhl_distances = nhl_distances.set_index('Unnamed: 0')
panthers_df['distance_between'] = panthers_df.apply(lambda row: nhl_distances.at[row['game_loc'], row['prev_game_loc']], axis = 1)

In [41]:
overall_df =  pd.read_csv("C:\\Users\\riffere\\Desktop\\season_team_points.csv")

In [42]:
teams = overall_df['team_name'].unique()

rolling_points_pctg = pd.DataFrame()

for team in teams:
    temp = overall_df[overall_df['team_name'] == team]
    rol_avg = temp['point_pct'].rolling(3, closed='left', min_periods = 1).mean()
    temp['last_3_pp_avg'] = rol_avg
    rolling_points_pctg = pd.concat([rolling_points_pctg, temp], axis = 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['last_3_pp_avg'] = rol_avg
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['last_3_pp_avg'] = rol_avg
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['last_3_pp_avg'] = rol_avg
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [43]:
opp_schedules =  pd.read_csv("C:\\Users\\riffere\\Desktop\\entire_nhl_schedule_historical.csv")

In [44]:
opp_schedules_final = pd.DataFrame()
for team in opp_schedules['team_name'].unique():

    temp = opp_schedules[opp_schedules['team_name'] == team]
    event_dates = temp['event_date'].values
    temp['opp_density'] = temp.apply(lambda row: get_range(row['event_date'], calendar_df),axis = 1)

    temp['opp_back_to_back'] = temp.apply(lambda row: get_back(row['event_date'], calendar_df),axis = 1)

    opp_schedules_final = pd.concat([opp_schedules_final, temp], axis = 0)

opp_schedules_final = opp_schedules_final[['game_id','event_date','opp_density','opp_back_to_back', 'team_name']].rename(columns = {'game_id':'id', 'team_name' : 'opponent'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['opp_density'] = temp.apply(lambda row: get_range(row['event_date'], calendar_df),axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['opp_back_to_back'] = temp.apply(lambda row: get_back(row['event_date'], calendar_df),axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['o

In [45]:
panthers_df_final = panthers_df.merge(opp_schedules_final, how = 'left', on = ['id', 'event_date', 'opponent'])

panthers_df_final = panthers_df.merge(rolling_points_pctg, how = 'left', on = ['id', 'event_date', 'opponent'])

dist = panthers_df_final[['distance_between']]
scaler = MinMaxScaler()
scaled = scaler.fit_transform(dist)
panthers_df_final['scaled_distance'] = scaled

panthers_df_final

Unnamed: 0,season,event_date,id,side,team_name,opponent,result,weekday,fla_density,fla_back_to_back,game_loc,prev_game_loc,distance_between,opp_density,opp_back_to_back,scaled_distance
0,20212022,10/14/21,2021020010,Home,FLA,PIT,Win,3,2,0,FLA,FLA,0.00,3,0,0.000000
1,20212022,10/16/21,2021020025,Home,FLA,NYI,Win,5,3,0,FLA,FLA,0.00,3,0,0.000000
2,20212022,10/19/21,2021020042,Away,FLA,TBL,Win,1,3,0,TBL,FLA,289.30,2,0,0.066388
3,20212022,10/21/21,2021020055,Home,FLA,COL,Win,3,3,0,FLA,TBL,289.30,3,0,0.066388
4,20212022,10/23/21,2021020070,Away,FLA,PHI,Win,5,3,0,PHI,FLA,1601.07,2,0,0.367409
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323,20242025,04/08/25,2024021237,Home,FLA,TOR,,1,4,0,FLA,DET,1816.57,3,1,0.416862
324,20242025,04/10/25,2024021251,Home,FLA,DET,,3,3,0,FLA,FLA,0.00,3,1,0.000000
325,20242025,04/12/25,2024021268,Home,FLA,BUF,,5,4,0,FLA,FLA,0.00,4,1,0.000000
326,20242025,04/14/25,2024021287,Home,FLA,NYR,,0,3,1,FLA,FLA,0.00,3,0,0.000000


In [42]:
x = panthers_df_final[['side', 'fla_back_to_back', 'fla_density', 'scaled_distance', 'opp_pp_3_seasons', 'opp_back_to_back', 'opp_density']]
x

KeyError: "['loc', 'opp_pp_3_seasons'] not in index"