In [163]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import pyplot
from sklearn.preprocessing import power_transform
from scipy import stats

df = pd.read_csv('../data/processed/X_full.csv')

def yeo_johnson(series):
    arr = np.array(series).reshape(-1, 1)
    return power_transform(arr, method='yeo-johnson')

def box_johnson(series):
    arr = np.array(series).reshape(-1, 1)
    return power_transform(arr, method='box-cox')

def stats_boxcox(series):
    arr = np.array(series).reshape(-1, 1)
    posdata = arr[arr > 0]
    bcdata, lam = stats.boxcox(posdata)
    x = np.empty_like(arr)
    x[arr > 0] = bcdata
    x[arr == 0] = -1/lam
    return x

# Add 1m55s to round if freezetime
df['round_status_time_left'] = df.apply(add_round_time, axis=1)
df = df.drop(['round_status_FreezeTime', 'round_status_Normal'], axis=1)

# Clip roundtime value
df['round_status_time_left'] = df['round_status_time_left'].clip(00.1, 175)

# Drop map columns
df = df.drop(df.columns[df.columns.str.contains('map')], axis=1)

# Make data more Gaussian-like
cols = ['ct_money', 't_money', 'ct_health',
 't_health', 'ct_armor', 't_armor', 'ct_helmets', 't_helmets',
  'ct_defuse_kits', 'ct_players', 't_players']
for col in cols:
    df[col] = yeo_johnson(df[col])

In [164]:
df.columns

Index(['round_status_time_left', 'ct_score', 't_score',
       'round_status_BombPlanted', 'ct_health', 't_health', 'ct_armor',
       't_armor', 'ct_money', 't_money', 'ct_helmets', 't_helmets',
       'ct_defuse_kits', 'ct_players', 't_players', 'ct_weapon_Ak47',
       't_weapon_Ak47', 'ct_weapon_Aug', 't_weapon_Aug', 'ct_weapon_Awp',
       't_weapon_Awp', 'ct_weapon_Bizon', 't_weapon_Bizon',
       'ct_weapon_Cz75Auto', 't_weapon_Cz75Auto', 'ct_weapon_Elite',
       't_weapon_Elite', 'ct_weapon_Famas', 't_weapon_Famas',
       'ct_weapon_G3sg1', 't_weapon_G3sg1', 'ct_weapon_GalilAr',
       't_weapon_GalilAr', 'ct_weapon_Glock', 't_weapon_Glock',
       'ct_weapon_M249', 't_weapon_M249', 'ct_weapon_M4a1S', 't_weapon_M4a1S',
       'ct_weapon_M4a4', 't_weapon_M4a4', 'ct_weapon_Mac10', 't_weapon_Mac10',
       'ct_weapon_Mag7', 't_weapon_Mag7', 'ct_weapon_Mp5sd', 't_weapon_Mp5sd',
       'ct_weapon_Mp7', 't_weapon_Mp7', 'ct_weapon_Mp9', 't_weapon_Mp9',
       'ct_weapon_Negev', 't_w