# **Feature Engineering: QRT Challenge**

### **Imports**

In [39]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn import model_selection
from sklearn.metrics import accuracy_score
import xgboost as xgb
import warnings
import matplotlib.pyplot as plt
import itertools
from itertools import chain


warnings.filterwarnings('ignore')

### **Number of selected features**

In [40]:
top_k = 80

### **Data Cleaning**

In [41]:
train_home_team_statistics_df = pd.read_csv('./Train_Data/train_home_team_statistics_df.csv', index_col=0)
train_away_team_statistics_df = pd.read_csv('./Train_Data/train_away_team_statistics_df.csv', index_col=0)

train_home_player_statistics_df = pd.read_csv('./Train_Data/train_home_player_statistics_df.csv', index_col=0)
train_away_player_statistics_df = pd.read_csv('./Train_Data/train_away_player_statistics_df.csv', index_col=0)

### **Data Preprocessing**

In [42]:
from sklearn.preprocessing import MinMaxScaler

# Fill NaN values with a specified value or strategy
train_home_team_statistics_df.fillna(0, inplace=True)  # Fill NaN with 0 for example
train_home_player_statistics_df.fillna(0, inplace=True)  # Fill NaN with 0 for example
train_away_team_statistics_df.fillna(0, inplace=True)  # Fill NaN with 0 for example
train_away_player_statistics_df.fillna(0, inplace=True)  # Fill NaN with 0 for example

# Perform min-max scaling on numeric columns
scaler = MinMaxScaler()

# Assuming numeric_cols contains the column names of numeric features
numeric_cols_home_team = train_home_team_statistics_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
numeric_cols_home_player = train_home_player_statistics_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
numeric_cols_away_team = train_away_team_statistics_df.select_dtypes(include=['float64', 'int64']).columns.tolist()
numeric_cols_away_player = train_away_player_statistics_df.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Fit scaler on train data and transform both train and test data
train_home_team_statistics_df[numeric_cols_home_team] = scaler.fit_transform(train_home_team_statistics_df[numeric_cols_home_team])
train_home_player_statistics_df[numeric_cols_home_player] = scaler.fit_transform(train_home_player_statistics_df[numeric_cols_home_player])
train_away_team_statistics_df[numeric_cols_away_team] = scaler.fit_transform(train_away_team_statistics_df[numeric_cols_away_team])
train_away_player_statistics_df[numeric_cols_away_player] = scaler.fit_transform(train_away_player_statistics_df[numeric_cols_away_player])

In [43]:
# Reset index and consider the original index as a column
train_home_team_statistics_df.reset_index(inplace=True)
train_away_team_statistics_df.reset_index(inplace=True)
train_home_player_statistics_df.reset_index(inplace=True)
train_away_player_statistics_df.reset_index(inplace=True)

## **Integration of the players data**

In [44]:
# Function to calculate player score
def calculate_player_score(player_features, score_features):
    return sum(player_features[feature] * score_features[feature] for feature in score_features)

In [45]:
# Define functions to rank players within each subset
def rank_players(df, position, num_players=1):
    

    subset = pd.concat([df[df['POSITION'] == position], df[df['POSITION'] == 0]])


    if position == "goalkeeper":
        score_features = {"PLAYER_SAVES_season_sum":1,
            "PLAYER_SAVES_INSIDE_BOX_season_sum":1,
            "PLAYER_PUNCHES_season_sum":1,
            "PLAYER_GOALKEEPER_GOALS_CONCEDED_season_sum":-1,
            "PLAYER_SAVES_INSIDE_BOX_season_average":1,
            "PLAYER_PUNCHES_season_average":1,
            "PLAYER_SAVES_INSIDE_BOX_5_last_match_sum":1,
            "PLAYER_PUNCHES_5_last_match_sum":1}

    
    if position == "defender":
        score_features = {"PLAYER_BLOCKED_SHOTS_season_sum":1,
            "PLAYER_CLEARANCES_season_sum":1,
            "PLAYER_INTERCEPTIONS_season_sum":1,
            "PLAYER_TACKLES_season_sum":1,
            "PLAYER_SHOTS_BLOCKED_season_sum":1,
            "PLAYER_CLEARANCE_OFFLINE_season_sum":1,
            "PLAYER_DISPOSSESSED_season_sum":-1,
            "PLAYER_DRIBBLED_PAST_season_sum":-1,
            "PLAYER_DUELS_LOST_season_sum":-1,
            "PLAYER_TOTAL_DUELS_season_sum":1}

    if position == "midfielder":
        score_features = {"PLAYER_ACCURATE_PASSES_season_sum":1,
            "PLAYER_ASSISTS_season_sum":1,
            "PLAYER_BIG_CHANCES_CREATED_season_sum":1,
            "PLAYER_INTERCEPTIONS_season_sum":1,
            "PLAYER_TACKLES_season_sum":1,
            "PLAYER_KEY_PASSES_season_sum":1,
            "PLAYER_SUCCESSFUL_DRIBBLES_season_sum":1,
            "PLAYER_TOTAL_CROSSES_season_sum":1,
            "PLAYER_SHOTS_ON_TARGET_season_sum":1,
            "PLAYER_YELLOWCARDS_season_sum":-1}

    if position == "attacker":
        score_features = {"PLAYER_GOALS_season_sum":1,
            "PLAYER_ASSISTS_season_sum":1,
            "PLAYER_BIG_CHANCES_CREATED_season_sum":1,
            "PLAYER_SHOTS_ON_TARGET_season_sum":1,
            "PLAYER_SHOTS_TOTAL_season_sum":1,
            "PLAYER_SUCCESSFUL_DRIBBLES_season_sum":1,
            "PLAYER_BIG_CHANCES_MISSED_season_sum":-1,
            "PLAYER_ACCURATE_CROSSES_season_sum":1,
            "PLAYER_KEY_PASSES_season_sum":1,
            "PLAYER_SHOTS_OFF_TARGET_season_sum":-1}


    player_scores = {}
    player_index = 0

    print(subset)
    for player_id, player_features in subset.iterrows():
        score = calculate_player_score(player_features, score_features)
        player_scores[player_index] = score
        player_index += 1


    # Sort players based on their scores (descending order)
    sorted_players = sorted(player_scores.items(), key=lambda x: x[1], reverse=True)

    top_k_players = []
    
    for k in range(num_players):
        player_id = sorted_players[k][0]
        player = subset.iloc[player_id]
        player_features = []
        for f in score_features:
            player_features.append(player[f])
        top_k_players.append(player_features)

    features = [f"{feature}_{position}_{i}" for i in range(num_players) for feature in score_features]

    return top_k_players, features

In [52]:
def add_player_stats(team_stats_df, player_stats_df):
  """
  Adds player statistics to each row in a team statistics dataframe based on matching league.

  Args:
      team_stats_df (pd.DataFrame): Dataframe containing team statistics.
      player_stats_df (pd.DataFrame): Dataframe containing player statistics.

  Returns:
      pd.DataFrame: Updated team statistics dataframe with added player stats column.
  """


  # Iterate through each team statistics row
  for index, team_stats_row in team_stats_df.iterrows():
    # Get the league value from the current team statistics row
    league = team_stats_row["LEAGUE"]
    team = team_stats_row["TEAM_NAME"]
    

    # Filter player stats for matching league and convert to dictionary
    # filtered_player_stats = player_stats_df[player_stats_df["LEAGUE"] == league]
    # filtered_player_stats = filtered_player_stats[filtered_player_stats["TEAM_NAME"] == team]
    filtered_player_stats = player_stats_df[player_stats_df["ID"] == index]
    
    # Get the starting 11 of the team
    goalkeeper, gk_features = rank_players(filtered_player_stats, "goalkeeper", num_players=1)
    defenders , df_features = rank_players(filtered_player_stats, "defender", num_players=4)
    midfielders, md_features = rank_players(filtered_player_stats, "midfielder", num_players=4)
    attackers, att_features = rank_players(filtered_player_stats, "attacker", num_players=2)



    features = goalkeeper + defenders + midfielders + attackers
    features = list(chain.from_iterable(features))

    
    features_names = gk_features + df_features + md_features + att_features


    # Add filtered player stats to the current team stats row
    for f_ind in range(len(features_names)):
      team_stats_df.at[index, features_names[f_ind]] = features[f_ind]


  return team_stats_df

In [None]:
train_home_team_statistics_df = add_player_stats(train_home_team_statistics_df, train_home_player_statistics_df)
train_away_team_statistics_df = add_player_stats(train_away_team_statistics_df, train_away_player_statistics_df)

In [None]:
train_scores = pd.read_csv('./Y_train.csv', index_col=0)

train_home = train_home_team_statistics_df.iloc[:,2:]
train_away = train_away_team_statistics_df.iloc[:,2:]

train_home.columns = 'HOME_' + train_home.columns
train_away.columns = 'AWAY_' + train_away.columns

train_data =  pd.concat([train_home,train_away],join='inner',axis=1)
train_scores = train_scores.loc[train_data.index]

train_data = train_data.replace({np.inf:np.nan,-np.inf:np.nan})

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(train_data, train_scores, train_size=0.8, random_state=42)
X_train, X_valid, y_train, y_valid = model_selection.train_test_split(X_train, y_train, train_size=0.8, random_state=42)

### **Variance Threshold**

In [None]:
# using sklearn variancethreshold to find constant features
from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=0)
sel.fit(X_train)  # fit finds the features with zero variance

In [None]:
# print the constant features
print(
    len([
        x for x in X_train.columns
        if x not in X_train.columns[sel.get_support()]
    ]))

10


### **Statistical Methods**

In [None]:
from sklearn.feature_selection import SelectPercentile, chi2

selected_feature_indices = SelectPercentile(chi2, percentile=top_k).fit(X_train, y_train).get_support(indices=True)
selected_feature_names = X_train.columns[selected_feature_indices]

print("Selected feature names:")
for feature_name in selected_feature_names:
    print(feature_name)

Selected feature names:
HOME_TEAM_SHOTS_TOTAL_season_sum
HOME_TEAM_SHOTS_INSIDEBOX_season_sum
HOME_TEAM_SHOTS_OFF_TARGET_season_sum
HOME_TEAM_SHOTS_ON_TARGET_season_sum
HOME_TEAM_SHOTS_OUTSIDEBOX_season_sum
HOME_TEAM_PASSES_season_sum
HOME_TEAM_SUCCESSFUL_PASSES_season_sum
HOME_TEAM_SAVES_season_sum
HOME_TEAM_CORNERS_season_sum
HOME_TEAM_FOULS_season_sum
HOME_TEAM_YELLOWCARDS_season_sum
HOME_TEAM_OFFSIDES_season_sum
HOME_TEAM_ATTACKS_season_sum
HOME_TEAM_PENALTIES_season_sum
HOME_TEAM_BALL_SAFE_season_sum
HOME_TEAM_DANGEROUS_ATTACKS_season_sum
HOME_TEAM_INJURIES_season_sum
HOME_TEAM_GOALS_season_sum
HOME_TEAM_GAME_WON_season_sum
HOME_TEAM_GAME_DRAW_season_sum
HOME_TEAM_GAME_LOST_season_sum
HOME_TEAM_SHOTS_TOTAL_season_average
HOME_TEAM_SHOTS_INSIDEBOX_season_average
HOME_TEAM_SHOTS_OFF_TARGET_season_average
HOME_TEAM_SHOTS_ON_TARGET_season_average
HOME_TEAM_SHOTS_OUTSIDEBOX_season_average
HOME_TEAM_PASSES_season_average
HOME_TEAM_SUCCESSFUL_PASSES_season_average
HOME_TEAM_SUCCESSFUL_PA

### **Sequential Feature Selection**

In [None]:
# step forward feature selection

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

sfs1 = SFS(RandomForestRegressor(), 
           k_features=top_k, 
           forward=True, 
           floating=False, 
           verbose=2,
           scoring='r2',
           n_jobs = 4,
           cv=3)

sfs1 = sfs1.fit(np.array(X_train), y_train)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    6.5s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:   23.9s
[Parallel(n_jobs=4)]: Done 357 tasks      | elapsed:  1.1min
[Parallel(n_jobs=4)]: Done 640 tasks      | elapsed:  1.9min
[Parallel(n_jobs=4)]: Done 708 out of 708 | elapsed:  2.2min finished

[2024-03-11 17:37:30] Features: 1/80 -- score: 0.017018094458630586[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:    8.0s
[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:   38.3s
[Parallel(n_jobs=4)]: Done 357 tasks      | elapsed:  2.1min
[Parallel(n_jobs=4)]: Done 640 tasks      | elapsed:  3.7min
[Parallel(n_jobs=4)]: Done 707 out of 707 | elapsed:  4.2min finished

[2024-03-11 17:41:43] Features: 2/80 -- score: 0.021064518270692122[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n

In [None]:
sfs1.k_feature_idx_

### **Correlation**

In [None]:
# Create a new column 'target' with the mapped values
train_scores['target'] = np.where(train_scores['HOME_WINS'] == 1, 1,
                                  np.where(train_scores['DRAW'] == 1, 0, -1))

# Drop the original columns
train_scores.drop(columns=['HOME_WINS', 'DRAW', 'AWAY_WINS'], inplace=True)

# Now merge train_data and train_scores
X = pd.concat([train_data, train_scores], axis=1)

In [None]:
correlation_matrix = X.corr()
target_correlation = correlation_matrix['target'].abs().sort_values(ascending=False)
top_k_features = target_correlation.index[:top_k]  # Choose an appropriate value for k

selected_features_corr = X[top_k_features]

In [None]:
top_k_features

Index(['target', 'HOME_TEAM_GAME_WON_season_sum',
       'HOME_TEAM_GAME_WON_season_average',
       'HOME_TEAM_SHOTS_ON_TARGET_season_average',
       'HOME_TEAM_GOALS_season_sum', 'HOME_TEAM_SHOTS_ON_TARGET_season_sum',
       'HOME_TEAM_GOALS_season_average',
       'AWAY_TEAM_SHOTS_ON_TARGET_season_average',
       'HOME_TEAM_BALL_POSSESSION_season_average',
       'HOME_TEAM_SHOTS_TOTAL_season_average',
       'AWAY_TEAM_GAME_WON_season_average',
       'AWAY_TEAM_SHOTS_ON_TARGET_season_sum',
       'AWAY_TEAM_GAME_LOST_season_average', 'AWAY_TEAM_GAME_LOST_season_sum',
       'AWAY_TEAM_GAME_WON_season_sum',
       'HOME_TEAM_SHOTS_INSIDEBOX_season_average',
       'HOME_TEAM_SHOTS_TOTAL_season_sum',
       'HOME_TEAM_GAME_LOST_season_average', 'HOME_TEAM_GAME_LOST_season_sum',
       'AWAY_TEAM_GOALS_season_average',
       'AWAY_TEAM_BALL_POSSESSION_season_average',
       'AWAY_TEAM_GOALS_season_sum', 'HOME_TEAM_DANGEROUS_ATTACKS_season_sum',
       'HOME_TEAM_DANGEROUS_ATTACK

### **Mutual Information Regressor**

In [None]:
X_train_one_col, X_test_one_col, y_train_one_col, y_test_one_col = model_selection.train_test_split(train_data, train_scores, train_size=0.8, random_state=42)
X_train_one_col, X_valid_one_col, y_train_one_col, y_valid_one_col = model_selection.train_test_split(X_train_one_col, y_train_one_col, train_size=0.8, random_state=42)

In [None]:
from sklearn.feature_selection import mutual_info_regression

mi_scores = mutual_info_regression(X_train_one_col, y_train_one_col)
top_k_features_mi = X_train_one_col.columns[mi_scores.argsort()[-top_k:][::-1]]

selected_features_mi = X_train_one_col[top_k_features_mi]

In [None]:
top_k_features_mi

Index(['HOME_TEAM_SHOTS_ON_TARGET_season_average',
       'HOME_TEAM_GAME_LOST_season_sum',
       'HOME_PLAYER_SHOTS_ON_TARGET_season_sum_midfielder_4',
       'HOME_PLAYER_SHOTS_BLOCKED_season_sum_defender_1',
       'AWAY_PLAYER_BLOCKED_SHOTS_season_sum_defender_5',
       'HOME_PLAYER_ASSISTS_season_sum_attacker_3',
       'AWAY_TEAM_PASSES_season_average',
       'AWAY_PLAYER_BIG_CHANCES_CREATED_season_sum_attacker_0',
       'HOME_PLAYER_KEY_PASSES_season_sum_midfielder_3',
       'HOME_TEAM_SUCCESSFUL_PASSES_5_last_match_average',
       'AWAY_PLAYER_SAVES_season_sum_goalkeeper_1',
       'HOME_PLAYER_TACKLES_season_sum_defender_2',
       'AWAY_PLAYER_ASSISTS_season_sum_attacker_2',
       'HOME_TEAM_SHOTS_TOTAL_season_average',
       'AWAY_PLAYER_INTERCEPTIONS_season_sum_defender_4',
       'HOME_TEAM_ATTACKS_season_average',
       'HOME_TEAM_DANGEROUS_ATTACKS_5_last_match_sum',
       'AWAY_PLAYER_BIG_CHANCES_CREATED_season_sum_midfielder_6',
       'HOME_PLAYER_SAVES_seaso

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
# Load iris data

# Create an SelectKBest object to select features with two best ANOVA F-Values
fvalue_selector = SelectKBest(f_classif, k=top_k)

# Apply the SelectKBest object to the features and target
X_kbest = fvalue_selector.fit_transform(X_train_one_col, y_train_one_col)

# Get boolean mask of selected features
selected_features_mask = fvalue_selector.get_support()

# Get indices of selected features
selected_features_indices = [i for i, selected in enumerate(selected_features_mask) if selected]

# Get names of selected features
selected_feature_names = X_train.columns[selected_feature_indices]

# Print selected feature names
print("Selected features:", selected_feature_names)

Selected features: Index(['HOME_TEAM_SHOTS_TOTAL_season_sum',
       'HOME_TEAM_SHOTS_INSIDEBOX_season_sum',
       'HOME_TEAM_SHOTS_OFF_TARGET_season_sum',
       'HOME_TEAM_SHOTS_ON_TARGET_season_sum',
       'HOME_TEAM_SHOTS_OUTSIDEBOX_season_sum', 'HOME_TEAM_PASSES_season_sum',
       'HOME_TEAM_SUCCESSFUL_PASSES_season_sum', 'HOME_TEAM_SAVES_season_sum',
       'HOME_TEAM_CORNERS_season_sum', 'HOME_TEAM_FOULS_season_sum',
       ...
       'AWAY_PLAYER_KEY_PASSES_season_sum_attacker_3',
       'AWAY_PLAYER_GOALS_season_sum_attacker_4',
       'AWAY_PLAYER_ASSISTS_season_sum_attacker_4',
       'AWAY_PLAYER_BIG_CHANCES_CREATED_season_sum_attacker_4',
       'AWAY_PLAYER_SHOTS_ON_TARGET_season_sum_attacker_4',
       'AWAY_PLAYER_SHOTS_TOTAL_season_sum_attacker_4',
       'AWAY_PLAYER_SUCCESSFUL_DRIBBLES_season_sum_attacker_4',
       'AWAY_PLAYER_BIG_CHANCES_MISSED_season_sum_attacker_4',
       'AWAY_PLAYER_ACCURATE_CROSSES_season_sum_attacker_4',
       'AWAY_PLAYER_KEY_PASSES_se

### **Lasso Regularization**

In [None]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.01)  # Choose an appropriate alpha value
lasso.fit(X_train_one_col, y_train_one_col)
selected_features_lasso = X_train_one_col.columns[lasso.coef_ != 0]

In [None]:
selected_features_lasso

Index(['HOME_TEAM_SAVES_season_sum', 'HOME_TEAM_CORNERS_season_sum',
       'HOME_TEAM_DANGEROUS_ATTACKS_season_sum', 'HOME_TEAM_GOALS_season_sum',
       'HOME_TEAM_GAME_WON_season_sum', 'HOME_TEAM_SHOTS_TOTAL_season_average',
       'HOME_TEAM_SHOTS_INSIDEBOX_season_average',
       'HOME_TEAM_BALL_POSSESSION_season_average',
       'HOME_TEAM_ATTACKS_season_average',
       'HOME_TEAM_GAME_LOST_season_average',
       'HOME_TEAM_CORNERS_5_last_match_average',
       'HOME_PLAYER_ASSISTS_season_sum_midfielder_0',
       'HOME_PLAYER_BIG_CHANCES_CREATED_season_sum_midfielder_3',
       'HOME_PLAYER_SHOTS_ON_TARGET_season_sum_midfielder_5',
       'HOME_PLAYER_BIG_CHANCES_CREATED_season_sum_attacker_1',
       'HOME_PLAYER_GOALS_season_sum_attacker_4',
       'HOME_PLAYER_ASSISTS_season_sum_attacker_4',
       'AWAY_TEAM_SHOTS_INSIDEBOX_season_sum', 'AWAY_TEAM_SAVES_season_sum',
       'AWAY_TEAM_CORNERS_season_sum', 'AWAY_TEAM_ATTACKS_season_sum',
       'AWAY_TEAM_GAME_LOST_season_su

In [None]:
train_home_team_statistics_df

Unnamed: 0_level_0,LEAGUE,TEAM_NAME,TEAM_SHOTS_TOTAL_season_sum,TEAM_SHOTS_INSIDEBOX_season_sum,TEAM_SHOTS_OFF_TARGET_season_sum,TEAM_SHOTS_ON_TARGET_season_sum,TEAM_SHOTS_OUTSIDEBOX_season_sum,TEAM_PASSES_season_sum,TEAM_SUCCESSFUL_PASSES_season_sum,TEAM_SAVES_season_sum,...,PLAYER_GOALS_season_sum_attacker_4,PLAYER_ASSISTS_season_sum_attacker_4,PLAYER_BIG_CHANCES_CREATED_season_sum_attacker_4,PLAYER_SHOTS_ON_TARGET_season_sum_attacker_4,PLAYER_SHOTS_TOTAL_season_sum_attacker_4,PLAYER_SUCCESSFUL_DRIBBLES_season_sum_attacker_4,PLAYER_BIG_CHANCES_MISSED_season_sum_attacker_4,PLAYER_ACCURATE_CROSSES_season_sum_attacker_4,PLAYER_KEY_PASSES_season_sum_attacker_4,PLAYER_SHOTS_OFF_TARGET_season_sum_attacker_4
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Ligue 1,Toulouse,0.3,0.2,0.5,0.2,0.1,0.2,0.2,0.5,...,0.00,0.00,0.40,0.00,0.25,0.00,0.00,1.00,0.81,0.0
1,Ligue 2,Brest,0.6,0.8,0.3,0.6,0.5,0.8,0.7,1.0,...,1.00,1.00,0.63,1.00,0.88,0.35,0.66,0.09,0.42,0.0
2,Serie A,Sampdoria,0.4,0.2,0.5,0.2,0.8,0.1,0.1,0.2,...,1.00,0.75,0.36,0.59,0.55,0.16,0.80,0.06,0.34,0.0
3,League One,Coventry City,0.7,0.5,0.5,0.6,0.6,0.9,0.9,0.2,...,0.12,0.16,0.37,0.31,0.34,0.27,0.14,0.43,0.58,0.0
4,Premier League,Wolverhampton Wanderers,0.3,0.3,0.2,0.3,0.4,0.4,0.3,0.4,...,0.30,0.27,0.35,0.46,0.55,0.59,0.08,1.00,0.61,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12298,League One,Southend United,0.4,0.2,0.3,0.4,0.7,0.4,0.4,0.3,...,0.45,0.00,0.00,0.50,0.55,0.44,0.00,0.04,0.15,0.0
12299,Liga Portugal,Boavista,0.4,0.2,0.3,0.1,0.5,0.1,0.1,0.9,...,0.21,0.50,0.30,0.34,0.59,0.37,0.20,0.29,0.65,0.0
12300,Bundesliga,Schalke 04,0.4,0.3,0.5,0.3,0.5,0.1,0.1,0.6,...,0.03,0.05,0.20,0.14,0.30,0.53,0.04,0.35,0.47,0.0
12301,League One,Fleetwood Town,0.2,0.0,0.1,0.1,0.0,0.0,0.0,0.0,...,0.71,0.21,0.53,0.50,0.60,0.15,0.40,0.03,0.41,0.0


In [None]:
import csv 

# Specify the file name
csv_file = 'train_home_statistics_df.csv'
csv_file_2 = 'train_away_statistics_df.csv'

# Writing data to CSV file
train_home_team_statistics_df.to_csv(csv_file, index=True)
train_away_team_statistics_df.to_csv(csv_file_2, index=True)


print(f'Data saved to {csv_file}')
print(f'Data saved to {csv_file_2}')

Data saved to train_home_statistics_df.csv
Data saved to train_away_statistics_df.csv
