In [1]:
import os
import pandas as pd

In [2]:
table_name = 'temptable.csv'
os.chdir('../data')
df = pd.read_csv(table_name)

In [3]:
df.sample(5)

Unnamed: 0,player_stats_id,season_id,team_id,player_id,games,points,goals,assists,penalty,p_m,...,player_height,player_weight,player_site_id,player_age,player_name,player_unicode_name,player_khl_id,player_nhl_id,season_year,season_type
1087,1157,53,71,9093,80,31,14,17,33,-6,...,188.0,93.0,9097,35.0,Kyle Brodziak,kyle brodziak,,8470803,2007,regular
16350,17426,29,77,13519,1,0,0,0,0,0,...,186.0,86.0,13525,44.0,Steve Washburn,steve washburn,,8459499,1999,regular
12065,12959,11,76,31894,28,5,1,4,75,-4,...,183.0,93.0,31881,51.0,Rich Pilon,rich pilon,,8450389,1993,regular
16070,17136,35,72,8739,14,6,3,3,23,2,...,,205.0,8742,44.0,Jamie Langenbrunner,jamie langenbrunner,,8459457,2001,regular
1379,1473,5,83,70461,54,19,8,11,39,-3,...,180.0,91.0,70402,62.0,Lucien Deblois ...,"lucien deblois a.k.a. ""lucien deblois""",,8446353,1991,regular


In [4]:
class DataFrameTransformer:
    def __init__(self, df):
        self.df = df
        self.transforms = []
    
    def add_transform(self, transform):
        self.transforms.append(transform)
    
    def add_transforms(self, transforms):
        for transform in transforms:
            self.add_transform(transform)
    
    def fit(self):
        for transform in transforms:
            self.df = transform(self.df)
        return self.df

In [5]:
# Пример - добавить столбец с общим числом очков для игрока

def add_total_points(df):
    df['total_points'] = df['points'].groupby(df['player_id']).transform('sum')
    return df

# ...

In [6]:
# Для добавления в трансформер достаточно написать функцию и добавить ее в массив transforms ниже

transforms = [add_total_points]
transformer = DataFrameTransformer(df)
transformer.add_transforms(transforms)
ext_df = transformer.fit()

In [7]:
ext_df[['player_id', 'total_points']].sample(5)

Unnamed: 0,player_id,total_points
12426,11584,403
22952,3975,23
488,9430,76
11117,15062,54
6380,19551,254


## Features for teams

In [8]:
table_name = 'team_stats.csv'
os.chdir('../data')
team_stats_df = pd.read_csv(table_name)

In [9]:
team_stats_df.sample(5)

Unnamed: 0,id,team_id,season_id,games,points,goals_scored,goals_missed,position_in_championship,position_in_conference,playoff_fact,position_in_division,nhl_id
20630,22102,200,92,38,45.0,91,84,7,4.0,False,3.0,
24537,26735,192,92,38,50.0,94,76,4,2.0,False,1.0,
33606,5754,46,91,35,,1,97,8,,True,,
34138,14312,724,91,36,,3,94,10,,True,,
9722,10419,1570,91,38,,2,71,22,,True,,


In [17]:
import numpy as np

# transform methods for seasons
def add_avg_games_for_season(df, team_stats_df):
    df = df.join(team_stats_df.groupby('season_id')['games'].mean(), on='season_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_mean_games_count_season']))
    return df

def add_avg_points_for_season(df, team_stats_df):
    df = df.join(team_stats_df.groupby('season_id')['points'].mean(), on='season_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_mean_points_season']))
    return df

def add_sum_points_for_season(df, team_stats_df):
    df = df.join(team_stats_df.groupby('season_id')['points'].sum(), on='season_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_sum_points_season']))
    return df

def add_team_count_for_season(df, team_stats_df):
    df = df.merge(team_stats_df.groupby('season_id').apply(lambda x: len(x['team_id'].unique())).rename('feature_team_count_season'), on='season_id')
    return df

# transform methods for teams
def add_avg_games_for_team(df, team_stats_df):
    df = df.join(team_stats_df.groupby('team_id')['games'].mean(), on='team_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_mean_games_count_team']))
    return df

def add_avg_points_for_team(df, team_stats_df):
    df = df.join(team_stats_df.groupby('team_id')['points'].mean(), on='team_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_mean_points_team']))
    return df

def add_sum_points_for_team(df, team_stats_df):
    df = df.join(team_stats_df.groupby('team_id')['points'].sum(), on='team_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_sum_points_team']))
    return df

def add_avg_goals_scored_for_team(df, team_stats_df):
    df = df.join(team_stats_df.groupby('team_id')['goals_scored'].mean(), on='team_id', rsuffix='_')
    df.columns=np.append(np.array(df.columns[:-1]), np.array(['feature_mean_goals_scored_team']))
    return df

def add_avg_goals_scored_per_game_for_team(df, team_stats_df):
    df['feature_mean_points_per_game_team'] = df['feature_mean_goals_scored_team'] / df['feature_mean_games_count_team']
    return df

In [18]:
class DataFrameTransformer:
    def __init__(self, df, team_stats_df):
        self.df = df
        self.team_stats_df = team_stats_df
        self.transforms = []
    
    def add_transform(self, transform):
        self.transforms.append(transform)
    
    def add_transforms(self, transforms):
        for transform in transforms:
            self.add_transform(transform)
    
    def fit(self):
        for transform in transforms:
            self.df = transform(self.df, self.team_stats_df)
        return self.df

In [19]:
transforms = [add_avg_games_for_season, add_avg_points_for_season, add_sum_points_for_season, add_team_count_for_season, add_avg_games_for_team, add_avg_points_for_team, add_sum_points_for_team, add_avg_points_per_game_for_team]
transformer = DataFrameTransformer(df, team_stats_df)
transformer.add_transforms(transforms)
ext_df = transformer.fit()

In [20]:
ext_df

Unnamed: 0,player_stats_id,season_id,team_id,player_id,games,points,goals,assists,penalty,p_m,...,season_type,total_points,feature_mean_games_count_season,feature_mean_points_season,feature_sum_points_season,feature_team_count_season,feature_mean_games_count_team,feature_mean_points_team,feature_sum_points_team,feature_mean_points_per_game_team
0,0,41,1479,8517,21,6,0,6,12,1,...,regular,619,82.0,87.370370,2359.0,27,79.360000,86.280000,2157.0,1.087198
1,18,41,78,9665,20,4,2,2,4,-9,...,regular,11,82.0,87.370370,2359.0,27,79.571429,91.142857,2552.0,1.145422
2,38,41,82,10673,2,0,0,0,0,0,...,regular,5,82.0,87.370370,2359.0,27,80.111111,78.833333,1419.0,0.984050
3,48,41,86,8658,69,19,2,17,12,8,...,regular,132,82.0,87.370370,2359.0,27,79.555556,85.629630,2312.0,1.076350
4,54,41,77,8833,1,0,0,0,0,0,...,regular,71,82.0,87.370370,2359.0,27,79.571429,89.892857,2517.0,1.129713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24214,25752,35,82,5517,81,8,2,6,90,-29,...,regular,16,82.0,86.555556,2337.0,27,80.111111,78.833333,1419.0,0.984050
24215,25764,35,76,69981,2,0,0,0,5,-1,...,regular,4,82.0,86.555556,2337.0,27,79.571429,76.107143,2131.0,0.956463
24216,25767,35,64,3734,3,0,0,0,0,0,...,regular,0,82.0,86.555556,2337.0,27,80.521739,90.565217,2083.0,1.124730
24217,25771,35,69,8634,62,24,5,19,32,-6,...,regular,333,82.0,86.555556,2337.0,27,80.111111,87.500000,1575.0,1.092233
