In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn import model_selection
from sklearn.metrics import accuracy_score
import xgboost as xgb
import warnings
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

In [9]:
train_home_player_statistics_df = pd.read_csv('./train_home_player_statistics_df.csv', index_col=0)
train_away_player_statistics_df = pd.read_csv('./train_away_player_statistics_df.csv', index_col=0)
test_away_player_statistics_df = pd.read_csv('./test_away_player_statistics_df.csv', index_col=0)
test_home_player_statistics_df = pd.read_csv('./test_home_player_statistics_df.csv', index_col=0)


train_scores = pd.read_csv('./Y_train.csv', index_col=0)

Let's explore the available features :

In [6]:
features = test_away_player_statistics_df.keys()
seasons = []
matches = []
for ft in features:
    if("season" in ft):
        seasons.append(ft)
    if("5" in ft):
        matches.append(ft)
    
for match in matches:
    print(match)

for season in seasons:
    print(season)

print(len(seasons), " , ", len(matches))

PLAYER_ACCURATE_CROSSES_5_last_match_sum
PLAYER_ACCURATE_PASSES_5_last_match_sum
PLAYER_AERIALS_WON_5_last_match_sum
PLAYER_ASSISTS_5_last_match_sum
PLAYER_BIG_CHANCES_CREATED_5_last_match_sum
PLAYER_BIG_CHANCES_MISSED_5_last_match_sum
PLAYER_BLOCKED_SHOTS_5_last_match_sum
PLAYER_CAPTAIN_5_last_match_sum
PLAYER_CLEARANCES_5_last_match_sum
PLAYER_CLEARANCE_OFFLINE_5_last_match_sum
PLAYER_DISPOSSESSED_5_last_match_sum
PLAYER_DRIBBLED_ATTEMPTS_5_last_match_sum
PLAYER_DRIBBLED_PAST_5_last_match_sum
PLAYER_DUELS_LOST_5_last_match_sum
PLAYER_DUELS_WON_5_last_match_sum
PLAYER_ERROR_LEAD_TO_GOAL_5_last_match_sum
PLAYER_FOULS_5_last_match_sum
PLAYER_FOULS_DRAWN_5_last_match_sum
PLAYER_GOALKEEPER_GOALS_CONCEDED_5_last_match_sum
PLAYER_GOALS_5_last_match_sum
PLAYER_GOALS_CONCEDED_5_last_match_sum
PLAYER_HIT_WOODWORK_5_last_match_sum
PLAYER_INTERCEPTIONS_5_last_match_sum
PLAYER_KEY_PASSES_5_last_match_sum
PLAYER_MINUTES_PLAYED_5_last_match_sum
PLAYER_OFFSIDES_5_last_match_sum
PLAYER_OWN_GOALS_5_la

In [7]:
print(train_away_player_statistics_df)
print(train_home_player_statistics_df)

        LEAGUE            TEAM_NAME    POSITION            PLAYER_NAME  \
ID                                                                       
0      Ligue 1  Olympique Marseille    defender           Lucas Perrin   
0      Ligue 1  Olympique Marseille  midfielder        Kevin Strootman   
0      Ligue 1  Olympique Marseille  goalkeeper            Yohann Pelé   
0      Ligue 1  Olympique Marseille    defender   Abdallah Ali Mohamed   
0      Ligue 1  Olympique Marseille         NaN       Nemanja Radonjic   
...        ...                  ...         ...                    ...   
12302  La Liga         FC Barcelona  goalkeeper  Marc-André ter Stegen   
12302  La Liga         FC Barcelona  midfielder           Alex Collado   
12302  La Liga         FC Barcelona    attacker           Lionel Messi   
12302  La Liga         FC Barcelona    attacker      Antoine Griezmann   
12302  La Liga         FC Barcelona  midfielder              Ansu Fati   

       PLAYER_ACCURATE_CROSSES_season

The problem : a lot of data is missing

In [72]:
df = train_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
m1 = np.intersect1d(gb.get_group(positions[0]).index, gb.get_group(positions[1]).index)
m2 = np.intersect1d(gb.get_group(positions[2]).index, gb.get_group(positions[3]).index)
print(np.intersect1d(m1, m2).shape[0])

5785


Less than 6k matches over the 12k matches have a complete away team (with at least one player in each position). To tackle this problem, let's predict the position of the player knowing the rest :

In [33]:
from sklearn.preprocessing import LabelEncoder
from sklearn import linear_model

print(train_home_player_statistics_df.groupby("POSITION").count().reset_index().iloc[:,:4])
print(train_away_player_statistics_df.groupby("POSITION").count().reset_index().iloc[:,:4])

train_home_player = train_home_player_statistics_df.loc[train_home_player_statistics_df.isna().loc[:,"POSITION"] == False]
train_away_player = train_away_player_statistics_df.loc[train_away_player_statistics_df.isna().loc[:,"POSITION"] == False]

le = LabelEncoder()
train_home_player["POSITION"] = le.fit_transform(train_home_player["POSITION"])
train_away_player["POSITION"] = le.fit_transform(train_away_player["POSITION"])

encoding = ["attacker", "defender", "goalkeeper", "midfielder"]

train_player_data = pd.concat((train_home_player, train_away_player))
train_y = train_player_data["POSITION"]
train_player_data = train_player_data.iloc[:,4:]

X_train, X_test, y_train, y_test = model_selection.train_test_split(train_player_data, train_y, train_size=0.8, random_state=42)

lin_model = linear_model.LogisticRegression(C=1.0)

lin_model.fit(X_train.replace({np.nan:0.0}), y_train)

y_pred = lin_model.predict(X_test.replace({np.nan:0.0}))
predictions = y_pred
print("test : ", np.round(accuracy_score(predictions,y_test),4))

     POSITION  LEAGUE  TEAM_NAME  PLAYER_NAME
0    attacker   23691      23691        23691
1    defender   67746      67746        67746
2  goalkeeper   24298      24298        24298
3  midfielder   78524      78524        78524
     POSITION  LEAGUE  TEAM_NAME  PLAYER_NAME
0    attacker   23591      23591        23591
1    defender   67662      67662        67662
2  goalkeeper   24358      24358        24358
3  midfielder   78266      78266        78266
test :  0.7927


We get $\sim 80\%$ efficacity ! It is not bad considering the simplicity of the model. Now let's see what we gained with this model. 

In [74]:
train_home_player_statistics_df.loc[train_home_player_statistics_df.isna()["POSITION"],"POSITION"] = (np.array(encoding)[lin_model.predict(train_home_player_statistics_df.iloc[:,4:].replace({np.nan:0.0}))])[train_home_player_statistics_df.isna()["POSITION"]]
train_away_player_statistics_df.loc[train_away_player_statistics_df.isna()["POSITION"],"POSITION"] = (np.array(encoding)[lin_model.predict(train_away_player_statistics_df.iloc[:,4:].replace({np.nan:0.0}))])[train_away_player_statistics_df.isna()["POSITION"]]

In [75]:
print(train_home_player_statistics_df.isna()["POSITION"].any())
print(train_away_player_statistics_df.isna()["POSITION"].any())

False
False


In [76]:
df = train_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
m1 = np.intersect1d(gb.get_group(positions[0]).index, gb.get_group(positions[1]).index)
m2 = np.intersect1d(gb.get_group(positions[2]).index, gb.get_group(positions[3]).index)
print(np.intersect1d(m1, m2).shape[0])

10834


## What about the public test data ?

In [77]:
df = test_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
m1 = np.intersect1d(gb.get_group(positions[0]).index, gb.get_group(positions[1]).index)
m2 = np.intersect1d(gb.get_group(positions[2]).index, gb.get_group(positions[3]).index)
print(np.intersect1d(m1, m2).shape[0])

18939


In [78]:
test_away_player_statistics_df.loc[test_away_player_statistics_df.isna()["POSITION"],"POSITION"] = (np.array(encoding)[lin_model.predict(test_away_player_statistics_df.iloc[:,1:].replace({np.nan:0.0}))])[test_away_player_statistics_df.isna()["POSITION"]]
print(test_away_player_statistics_df.isna()["POSITION"].any())

False


In [79]:
df = test_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
m1 = np.intersect1d(gb.get_group(positions[0]).index, gb.get_group(positions[1]).index)
m2 = np.intersect1d(gb.get_group(positions[2]).index, gb.get_group(positions[3]).index)
print(np.intersect1d(m1, m2).shape[0])

23943


We get back 5k over the 24k matches !

In [86]:
import pickle
pickle.dump(lin_model, open("pos_model", "wb"))
new_model = pickle.load(open("pos_model", "rb"))

## Feature selection

In [67]:
useless_fts = []
df = train_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
for pos in positions:
    posdf = gb.get_group(pos)
    for key in posdf.keys():
        if((posdf[key] == 0.0).all()):
            useless_fts.append(key)

print(np.unique(useless_fts))

['PLAYER_CAPTAIN_5_last_match_average' 'PLAYER_CAPTAIN_5_last_match_std'
 'PLAYER_CAPTAIN_season_average' 'PLAYER_CAPTAIN_season_std'
 'PLAYER_CLEARANCE_OFFLINE_5_last_match_average'
 'PLAYER_CLEARANCE_OFFLINE_5_last_match_std'
 'PLAYER_CLEARANCE_OFFLINE_5_last_match_sum'
 'PLAYER_LONG_BALLS_5_last_match_average'
 'PLAYER_LONG_BALLS_5_last_match_std' 'PLAYER_LONG_BALLS_5_last_match_sum'
 'PLAYER_LONG_BALLS_WON_5_last_match_average'
 'PLAYER_LONG_BALLS_WON_5_last_match_std'
 'PLAYER_LONG_BALLS_WON_5_last_match_sum'
 'PLAYER_LONG_BALLS_WON_season_average' 'PLAYER_LONG_BALLS_WON_season_std'
 'PLAYER_LONG_BALLS_WON_season_sum' 'PLAYER_LONG_BALLS_season_average'
 'PLAYER_LONG_BALLS_season_std' 'PLAYER_LONG_BALLS_season_sum'
 'PLAYER_PENALTIES_SAVED_5_last_match_average'
 'PLAYER_PENALTIES_SAVED_5_last_match_std'
 'PLAYER_PENALTIES_SAVED_5_last_match_sum'
 'PLAYER_PENALTIES_SAVED_season_average'
 'PLAYER_PENALTIES_SAVED_season_std' 'PLAYER_PENALTIES_SAVED_season_sum'
 'PLAYER_PENALTIES_WON_5

In [10]:
import pickle

lin_model_position = pickle.load(open("pos_model", "rb"))
encoding = ["attacker", "defender", "goalkeeper", "midfielder"]


train_home_player_statistics_df.loc[train_home_player_statistics_df.isna()["POSITION"],"POSITION"] = (np.array(encoding)[lin_model_position.predict(train_home_player_statistics_df.iloc[:,4:].replace({np.nan:0.0}))])[train_home_player_statistics_df.isna()["POSITION"]]
train_away_player_statistics_df.loc[train_away_player_statistics_df.isna()["POSITION"],"POSITION"] = (np.array(encoding)[lin_model_position.predict(train_away_player_statistics_df.iloc[:,4:].replace({np.nan:0.0}))])[train_away_player_statistics_df.isna()["POSITION"]]

df = train_away_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb_away = df.set_index("ID").groupby("POSITION")
positions = ["attacker", "goalkeeper", "midfielder", "defender"]
m1 = np.intersect1d(gb_away.get_group(positions[0]).index, gb_away.get_group(positions[1]).index)
m2 = np.intersect1d(gb_away.get_group(positions[2]).index, gb_away.get_group(positions[3]).index)
away_m = np.intersect1d(m1, m2)

df = train_home_player_statistics_df.reset_index().groupby(["POSITION", "ID"], as_index=False).sum()
gb_home = df.set_index("ID").groupby("POSITION")
m1 = np.intersect1d(gb_home.get_group(positions[0]).index, gb_home.get_group(positions[1]).index)
m2 = np.intersect1d(gb_home.get_group(positions[2]).index, gb_home.get_group(positions[3]).index)
home_m = np.intersect1d(m1, m2)

m = np.intersect1d(away_m, home_m)

train_player_data = []
useless_features = open("lines.txt", "r").readlines()
useless_features = [ft[:-1] for ft in useless_features]
print(useless_features)
for pos in positions:
    df_home_pos = gb_home.get_group(pos).drop(useless_features, axis=1)
    df_away_pos = gb_away.get_group(pos).drop(useless_features, axis=1)
    df_home_pos.columns = 'HOME_' + df_home_pos.columns
    df_away_pos.columns = 'AWAY_' + df_away_pos.columns
    train_player_data.append(df_home_pos.iloc[:,1:].join(df_away_pos.iloc[:,1:]))

['PLAYER_CAPTAIN_5_last_match_average', 'PLAYER_CAPTAIN_5_last_match_std', 'PLAYER_CAPTAIN_season_average', 'PLAYER_CAPTAIN_season_std', 'PLAYER_CLEARANCE_OFFLINE_5_last_match_average', 'PLAYER_CLEARANCE_OFFLINE_5_last_match_std', 'PLAYER_CLEARANCE_OFFLINE_5_last_match_sum', 'PLAYER_LONG_BALLS_5_last_match_average', 'PLAYER_LONG_BALLS_5_last_match_std', 'PLAYER_LONG_BALLS_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_5_last_match_average', 'PLAYER_LONG_BALLS_WON_5_last_match_std', 'PLAYER_LONG_BALLS_WON_5_last_match_sum', 'PLAYER_LONG_BALLS_WON_season_average', 'PLAYER_LONG_BALLS_WON_season_std', 'PLAYER_LONG_BALLS_WON_season_sum', 'PLAYER_LONG_BALLS_season_average', 'PLAYER_LONG_BALLS_season_std', 'PLAYER_LONG_BALLS_season_sum', 'PLAYER_PENALTIES_SAVED_5_last_match_average', 'PLAYER_PENALTIES_SAVED_5_last_match_std', 'PLAYER_PENALTIES_SAVED_5_last_match_sum', 'PLAYER_PENALTIES_SAVED_season_average', 'PLAYER_PENALTIES_SAVED_season_std', 'PLAYER_PENALTIES_SAVED_season_sum', 'PLAYER_PENALTIE

In [11]:
from sklearn import feature_selection

train_player_data[0].columns = "ATTACK_" + train_player_data[0].columns
train_player_data[1].columns = "GOALKEEP_" + train_player_data[1].columns
train_player_data[2].columns = "MIDFIELD_" + train_player_data[2].columns
train_player_data[3].columns = "DEFEND_" + train_player_data[3].columns

train_home_team_statistics_df = pd.read_csv('./train_home_team_statistics_df.csv', index_col=0)
train_away_team_statistics_df = pd.read_csv('./train_away_team_statistics_df.csv', index_col=0)
train_home_team_statistics_df.columns = 'HOME_' + train_home_team_statistics_df.columns
train_away_team_statistics_df.columns = 'AWAY_' + train_away_team_statistics_df.columns

X_train = train_home_team_statistics_df.iloc[m,2:].join(train_away_team_statistics_df.iloc[m,2:].join(train_player_data[0].loc[m,:].join(train_player_data[1].loc[m,:].join(train_player_data[2].loc[m,:].join(train_player_data[3].loc[m,:])))))


_, p_values = feature_selection.chi2(X_train.replace({np.nan:0.0}).values, np.argmax(train_scores.loc[m,:].values, axis=1))

print(X_train.columns[p_values > 10**(-4)])

Index(['HOME_TEAM_REDCARDS_season_sum', 'HOME_TEAM_SUBSTITUTIONS_season_sum',
       'HOME_TEAM_REDCARDS_season_average',
       'HOME_TEAM_SUBSTITUTIONS_season_average',
       'HOME_TEAM_BALL_POSSESSION_season_std',
       'HOME_TEAM_YELLOWCARDS_season_std', 'HOME_TEAM_REDCARDS_season_std',
       'HOME_TEAM_SUBSTITUTIONS_season_std', 'HOME_TEAM_BALL_SAFE_season_std',
       'HOME_TEAM_INJURIES_season_std',
       ...
       'DEFEND_AWAY_PLAYER_SAVES_season_std',
       'DEFEND_AWAY_PLAYER_DUELS_LOST_5_last_match_sum',
       'DEFEND_AWAY_PLAYER_TACKLES_5_last_match_sum',
       'DEFEND_AWAY_PLAYER_TOTAL_DUELS_5_last_match_sum',
       'DEFEND_AWAY_PLAYER_DUELS_LOST_5_last_match_average',
       'DEFEND_AWAY_PLAYER_FOULS_5_last_match_average',
       'DEFEND_AWAY_PLAYER_TACKLES_5_last_match_average',
       'DEFEND_AWAY_PLAYER_TOTAL_DUELS_5_last_match_average',
       'DEFEND_AWAY_PLAYER_INTERCEPTIONS_5_last_match_std',
       'DEFEND_AWAY_PLAYER_TOTAL_DUELS_5_last_match_std'],
     

In [12]:
print(X_train)

       HOME_TEAM_SHOTS_TOTAL_season_sum  HOME_TEAM_SHOTS_INSIDEBOX_season_sum  \
ID                                                                              
0                                   3.0                                   2.0   
1                                   6.0                                   8.0   
2                                   4.0                                   2.0   
3                                   7.0                                   5.0   
4                                   3.0                                   3.0   
...                                 ...                                   ...   
12296                               0.0                                   0.0   
12297                               6.0                                   NaN   
12299                               4.0                                   2.0   
12300                               4.0                                   3.0   
12302                       

In [13]:
res = feature_selection.mutual_info_classif(X_train.replace({np.nan:0.0}).values, np.argmax(train_scores.loc[m,:].values, axis=1))
I = np.argsort(res)[::-1]

for i in I:
    print(X_train.columns[i], " : ", res[i])

HOME_TEAM_SHOTS_INSIDEBOX_season_sum  :  0.026778042417448633
HOME_TEAM_SHOTS_TOTAL_5_last_match_sum  :  0.02601384572117249
HOME_TEAM_GAME_LOST_season_sum  :  0.02575322477429398
HOME_TEAM_SHOTS_INSIDEBOX_season_average  :  0.025566188274749102
HOME_TEAM_GAME_WON_5_last_match_sum  :  0.025286318273428865
AWAY_TEAM_SHOTS_TOTAL_5_last_match_average  :  0.024092583803607903
HOME_TEAM_GOALS_season_average  :  0.023849945652694604
HOME_TEAM_PASSES_season_sum  :  0.023480996419447786
AWAY_TEAM_SUCCESSFUL_PASSES_PERCENTAGE_season_average  :  0.02346679701542098
AWAY_TEAM_PASSES_season_average  :  0.023462519797532888
AWAY_TEAM_PASSES_5_last_match_average  :  0.023382991398056108
HOME_TEAM_ATTACKS_season_average  :  0.02335805017134085
HOME_TEAM_SHOTS_INSIDEBOX_5_last_match_sum  :  0.02313850603192824
HOME_TEAM_SHOTS_ON_TARGET_season_sum  :  0.0231108299839895
AWAY_TEAM_SHOTS_INSIDEBOX_season_average  :  0.023101933572565958
HOME_TEAM_CORNERS_season_sum  :  0.022604177378910295
HOME_TEAM_SHOT

In [16]:
for i in I[:512]:
    print(X_train.columns[i])


HOME_TEAM_SHOTS_INSIDEBOX_season_sum
HOME_TEAM_SHOTS_TOTAL_5_last_match_sum
HOME_TEAM_GAME_LOST_season_sum
HOME_TEAM_SHOTS_INSIDEBOX_season_average
HOME_TEAM_GAME_WON_5_last_match_sum
AWAY_TEAM_SHOTS_TOTAL_5_last_match_average
HOME_TEAM_GOALS_season_average
HOME_TEAM_PASSES_season_sum
AWAY_TEAM_SUCCESSFUL_PASSES_PERCENTAGE_season_average
AWAY_TEAM_PASSES_season_average
AWAY_TEAM_PASSES_5_last_match_average
HOME_TEAM_ATTACKS_season_average
HOME_TEAM_SHOTS_INSIDEBOX_5_last_match_sum
HOME_TEAM_SHOTS_ON_TARGET_season_sum
AWAY_TEAM_SHOTS_INSIDEBOX_season_average
HOME_TEAM_CORNERS_season_sum
HOME_TEAM_SHOTS_TOTAL_season_average
DEFEND_AWAY_PLAYER_CLEARANCES_season_average
AWAY_TEAM_CORNERS_season_sum
HOME_TEAM_SUCCESSFUL_PASSES_5_last_match_average
AWAY_TEAM_DANGEROUS_ATTACKS_season_sum
HOME_TEAM_SHOTS_ON_TARGET_5_last_match_sum
AWAY_TEAM_OFFSIDES_season_average
ATTACK_AWAY_PLAYER_TOTAL_DUELS_5_last_match_average
AWAY_TEAM_PASSES_season_sum
AWAY_TEAM_ATTACKS_season_sum
AWAY_TEAM_BALL_POSSESS

## Team selection

In [2]:
train_home_team_statistics_df = pd.read_csv('./train_home_team_statistics_df.csv', index_col=0)
train_away_team_statistics_df = pd.read_csv('./train_away_team_statistics_df.csv', index_col=0)
test_away_team_statistics_df = pd.read_csv('./test_away_team_statistics_df.csv', index_col=0)
test_home_team_statistics_df = pd.read_csv('./test_home_team_statistics_df.csv', index_col=0)

train_home_team_statistics_df.columns = "HOME_" + train_home_team_statistics_df.columns
train_away_team_statistics_df.columns = "AWAY_" + train_away_team_statistics_df.columns
test_home_team_statistics_df.columns = "HOME_" + test_home_team_statistics_df.columns
test_away_team_statistics_df.columns = "AWAY_" + test_away_team_statistics_df.columns

train_team_data = train_home_team_statistics_df.iloc[:,2:].join(train_away_team_statistics_df.iloc[:,2:])
test_team_data = test_home_team_statistics_df.join(test_away_team_statistics_df)

train_scores = pd.read_csv('./Y_train.csv', index_col=0)

In [3]:
from sklearn import feature_selection

res = feature_selection.mutual_info_classif(train_team_data.replace({np.nan:0.0}).values, np.argmax(train_scores.values, axis=1))
I = np.argsort(res)[::-1]

for i in I:
    print(train_team_data.columns[i], " : ", res[i])

HOME_TEAM_GAME_WON_season_sum  :  0.028371067521388404
HOME_TEAM_BALL_POSSESSION_season_average  :  0.024109690734844147
HOME_TEAM_SHOTS_ON_TARGET_5_last_match_average  :  0.021274569662818044
AWAY_TEAM_SHOTS_ON_TARGET_season_average  :  0.021271645346246704
AWAY_TEAM_GAME_LOST_season_average  :  0.020599529337402167
AWAY_TEAM_CORNERS_season_average  :  0.020069344424401336
HOME_TEAM_PASSES_season_average  :  0.019734326679457848
AWAY_TEAM_ATTACKS_season_average  :  0.019149746069186735
HOME_TEAM_SHOTS_ON_TARGET_season_average  :  0.018985161382693017
AWAY_TEAM_GAME_LOST_season_sum  :  0.01871635299966168
AWAY_TEAM_PASSES_season_sum  :  0.017811480739181595
HOME_TEAM_SHOTS_ON_TARGET_5_last_match_sum  :  0.01780026405863344
AWAY_TEAM_BALL_POSSESSION_season_average  :  0.017647548854283546
AWAY_TEAM_SHOTS_TOTAL_season_sum  :  0.01752917494587103
HOME_TEAM_GAME_LOST_5_last_match_average  :  0.017459933952080675
AWAY_TEAM_SHOTS_INSIDEBOX_season_average  :  0.017433582433757033
AWAY_TEAM_PA

In [6]:
for i in I:
        if(res[i] > 0.0):
                print(train_team_data.columns[i])

HOME_TEAM_GAME_WON_season_sum
HOME_TEAM_BALL_POSSESSION_season_average
HOME_TEAM_SHOTS_ON_TARGET_5_last_match_average
AWAY_TEAM_SHOTS_ON_TARGET_season_average
AWAY_TEAM_GAME_LOST_season_average
AWAY_TEAM_CORNERS_season_average
HOME_TEAM_PASSES_season_average
AWAY_TEAM_ATTACKS_season_average
HOME_TEAM_SHOTS_ON_TARGET_season_average
AWAY_TEAM_GAME_LOST_season_sum
AWAY_TEAM_PASSES_season_sum
HOME_TEAM_SHOTS_ON_TARGET_5_last_match_sum
AWAY_TEAM_BALL_POSSESSION_season_average
AWAY_TEAM_SHOTS_TOTAL_season_sum
HOME_TEAM_GAME_LOST_5_last_match_average
AWAY_TEAM_SHOTS_INSIDEBOX_season_average
AWAY_TEAM_PASSES_season_average
HOME_TEAM_SHOTS_TOTAL_season_average
AWAY_TEAM_SUCCESSFUL_PASSES_season_sum
AWAY_TEAM_DANGEROUS_ATTACKS_season_average
AWAY_TEAM_CORNERS_season_sum
HOME_TEAM_GOALS_season_sum
AWAY_TEAM_GAME_WON_season_sum
AWAY_TEAM_GOALS_season_sum
AWAY_TEAM_PASSES_5_last_match_sum
HOME_TEAM_PASSES_5_last_match_sum
AWAY_TEAM_DANGEROUS_ATTACKS_5_last_match_average
AWAY_TEAM_SUBSTITUTIONS_seas