In [26]:
import pandas as pd
import numpy as np
import os
import datetime
import random
from functools import reduce

from collect import Collect
from processing import Processing

pd.options.mode.chained_assignment = None

In [2]:
# load total_pbp
P = Processing("./")
DF = P.get_total_pbp()
DF['location'] = DF['location'].astype(str)
DF = DF.loc[~DF['location'].str.contains('Overtime')]
DF = P.gd[['key', 'wy']].merge(DF, on=['key'])
DF: pd.DataFrame = P.add_datetime_columns(DF)
DF['epa'] = DF['epa'].astype(float)
DF['epb'] = DF['epb'].astype(float)
DF['epa_added'] = DF.apply(lambda row: row['epa']-row['epb'], axis=1)
DF = DF.loc[~pd.isna(DF['epa_added'])]

In [64]:
class Main:
    def __init__(self, _dir):
        self._dir = _dir
        self.data_dir = self._dir + "data/"
        self.features_dir = self._dir + "data/features/"
        self.game_data_dir = self._dir + "../data/"
        self.position_data_dir = self.game_data_dir + "positionData/"
        # frames
        self.gd = pd.read_csv("%s.csv" % (self.game_data_dir + "gameData"))
        self.df: pd.DataFrame = DF if not DF.empty else self.get_df()
        # features
        self.feature_funcs = [
            self.possession_epas_feature, self.player_epas_feature
        ]
        # info
        self.datetime_cols = ['week', 'year', 'datetime']
        self.skill_positions = ['qb', 'rb', 'wr', 'te']
        self.epa_entity_cols = { # pid is in col for valid epa
            'qb': ['pid_PASSER', 'pid_RUSHER', 'pid_PENALIZER', 'pid_FUMBLER'],
            'rb': ['pid_RUSHER', 'pid_RECEIVER', 'pid_RETURNER', 'pid_PENALIZER', 'pid_FUMBLER'],
            'wr': ['pid_RUSHER', 'pid_RECEIVER', 'pid_RETURNER', 'pid_PENALIZER', 'pid_FUMBLER'],
            'te': ['pid_RECEIVER', 'pid_PENALIZER', 'pid_FUMBLER'],
        }
        return
    # helpers
    def save_frame(self, df: pd.DataFrame, name: str):
        df.to_csv("%s.csv" % name, index=False)
        return
    def print_progress_bar(self, iteration, total, prefix = 'Progress', suffix = 'Complete', decimals = 1, length = 50, fill = '█', printEnd = "\r"):
        """
        Call in a loop to create terminal progress bar
        @params:
            iteration   - Required  : current iteration (Int)
            total       - Required  : total iterations (Int)
            prefix      - Optional  : prefix string (Str)
            suffix      - Optional  : suffix string (Str)
            decimals    - Optional  : positive number of decimals in percent complete (Int)
            length      - Optional  : character length of bar (Int)
            fill        - Optional  : bar fill character (Str)
            printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
        """
        percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
        filledLength = int(length * iteration // total)
        bar = fill * filledLength + '-' * (length - filledLength)
        print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
        # Print New Line on Complete
        if iteration == total: 
            print()
        return
    def get_datetime(self, week: int, year: int):
        return datetime.datetime.strptime(f'{year}-W{week}-1', "%Y-W%W-%w")
    def add_datetime_columns(self, df: pd.DataFrame):
        df['week'] = [int(wy.split(" | ")[0]) for wy in df['wy'].values]
        df['year'] = [int(wy.split(" | ")[1]) for wy in df['wy'].values]
        df['datetime'] = [self.get_datetime(week, year) for week, year in df[['week', 'year']].values]
        return df
    # END helpers
    # getters/setters
    def get_df(self):
        """
        PBP total
        Returns:
            pd.DataFrame: merge DataFrame
        """
        df = Processing("./").get_total_pbp()
        df['location'] = df['location'].astype(str)
        df = df.loc[~df['location'].str.contains('Overtime')]
        df = self.gd[['key', 'wy']].merge(df, on=['key'])
        df: pd.DataFrame = self.add_datetime_columns(df)
        df['epa'] = df['epa'].astype(float)
        df['epb'] = df['epb'].astype(float)
        df['epa_added'] = df.apply(lambda row: row['epa']-row['epb'], axis=1)
        df = df.loc[~pd.isna(df['epa_added'])]
        return df
    # END getters/setters
    def update_all(self):
        """
        Write new tables, clean tables, update allTables
        Update/create names, possessions, and entities
        """
        collect = Collect("./")
        collect.updateTables()
        processing = Processing("./")
        processing.update()
        return
    def possession_epas_feature(self, df: pd.DataFrame):
        """
        Get offensive and defensive EPA added (EPA - EPB) + total EPA for each game
        Args:
            df (pd.DataFrame): total_pbp
        """
        fn = "possession_epas"
        if f'{fn}.csv' in os.listdir(self.features_dir):
            print(f"{fn} already created.")
            return
        print(f"Creating {fn}...")
        edf = df.groupby(by=['key', 'possession']).mean()[['epa_added', 'epa']]
        edf = edf.reset_index()
        edf.columns = ['key', 'abbr', 'epa_added', 'epa']
        new_df = pd.DataFrame(columns=['key', 'home_abbr', 'away_abbr', 'home_epa_added', 'away_epa_added', 'home_total_epa', 'away_total_epa'])
        for i in range(0, len(edf.index), 2):
            a = edf.iloc[i]
            b = edf.iloc[i+1]
            abbrs = [a['abbr'], b['abbr']]
            home_abbr = (a['key'][-3:]).upper()
            abbrs.remove(home_abbr)
            home_dif = a['epa_added'] if home_abbr == a['abbr'] else b['epa_added']
            away_dif = b['epa_added'] if home_abbr == a['abbr'] else a['epa_added']
            home_epa = a['epa'] if home_abbr == a['abbr'] else b['epa']
            away_epa = b['epa'] if home_abbr == a['abbr'] else a['epa']
            new_df.loc[len(new_df.index)] = [a['key'], home_abbr, abbrs[0], home_dif, away_dif, home_epa, away_epa]
        new_df = new_df.merge(self.gd[['key', 'wy']], on=['key'])
        new_df = new_df[['key', 'wy', 'home_abbr', 'away_abbr', 'home_epa_added', 'away_epa_added', 'home_total_epa', 'away_total_epa']]
        self.save_frame(new_df, (self.features_dir + fn))
        return
    def player_epas_feature(self, df: pd.DataFrame):
        """
        Get EPA added + total EPA when passer involved in play, ONLY either PASSER, RUSHER, PENALIZER, or FUMBLER
        Args:
            df (pd.DataFrame): total_pbp
        """
        for position in self.skill_positions:
            cols = [f'{position}_epa_added', f'{position}_total_epa']
            target_cols = self.epa_entity_cols[position]
            # APPLY func
            def get_epas(row: pd.Series):
                pid, key = row['p_id'], row['game_key']
                game: pd.DataFrame = df.loc[df['key']==key]
                game.fillna('', inplace=True)
                mask = reduce(np.logical_or, [game[col].str.contains(pid) for col in target_cols])
                stats = game.loc[mask, ['epa_added', 'epa']].values
                avgs = np.mean(stats, axis=0) if len(stats) != 0 else [np.nan, np.nan]
                return { cols[i]: avgs[i] for i in range(len(cols)) }
            # END APPLY func
            fn = f"{position}_epas"
            if f'{fn}.csv' in os.listdir(self.features_dir):
                print(f"{fn} already created.")
                continue # skip position and move to next
            print(f"Creating {fn}...")
            cd = pd.read_csv("%s.csv" % (self.position_data_dir + (position.upper() + "Data")))
            cd = cd[['p_id', 'wy', 'game_key']]
            cd = self.add_datetime_columns(cd)
            cd = cd.loc[cd['datetime']>=self.get_datetime(1, 2011)]
            cd.reset_index(drop=True, inplace=True)
            cd[cols] = cd.apply(lambda x: get_epas(x), axis=1, result_type='expand')
            cd.drop(columns=self.datetime_cols, inplace=True)
            cd.columns = ['p_id', 'wy', 'key'] + cols
            cd.dropna(inplace=True)
            self.save_frame(cd, (self.features_dir + fn))
        return
    def build_features(self):
        df = self.df
        [func(df) for func in self.feature_funcs]
        return
    
# END / Main

###################

# !!! Tables go from 2011 - 2023 week 19 !!!
if __name__ == '__main__':
    m = Main("./")
    m.build_features()

possession_epas already created.
Creating qb_epas...
Creating rb_epas...
Creating wr_epas...
Creating te_epas...
