In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3

cnx = sqlite3.connect('database.sqlite')

player_data = pd.read_sql("SELECT * FROM Player;", cnx)
player_stats_data = pd.read_sql("SELECT * FROM Player_Attributes;", cnx)
match_data = pd.read_sql("SELECT * FROM Match;", cnx)


def get_data(team_api_id):
    df_match_data = match_data.loc[match_data['home_team_api_id'] == team_api_id]
    df_all_player = _get_all_player(team_api_id)
    df_all_player = df_all_player.dropna()   
    df_home_team_player = df_all_player[['home_player_1', 'home_player_2','home_player_3', 'home_player_4', 'home_player_5', 'home_player_6', 
                                         'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10', 'home_player_11']]
    df_away_team_player = df_all_player[['away_player_1', 'away_player_2','away_player_3', 'away_player_4', 'away_player_5', 'away_player_6', 
                                         'away_player_7', 'away_player_8', 'away_player_9', 'away_player_10', 'away_player_11']]
    
    df_striker_home = df_home_team_player['home_player_11']
    df_striker_away = df_away_team_player['away_player_11']
    df_goalkeeper_home = df_home_team_player['home_player_1']
    df_goalkeeper_away = df_away_team_player['away_player_1']

    df_average_rating_both_teams = _get_average_rating_teams(df_home_team_player, df_away_team_player)
    df_weight_home = _get_average_weight(df_home_team_player)
    df_weight_away = _get_average_weight(df_away_team_player)
    df_height_home = _get_average_height(df_home_team_player)
    df_height_away = _get_average_height(df_away_team_player)
    df_keeper_stats_home = _get_goalkeeper_stats(df_goalkeeper_home)
    df_keeper_stats_away = _get_goalkeeper_stats(df_goalkeeper_away)
    df_striker_stats_home = _get_attacker_stats(df_striker_home)
    df_striker_stats_away = _get_attacker_stats(df_striker_away)
    df_strongest_foot_home = _get_strongest_foot(df_home_team_player)
    df_strongest_foot_away = _get_strongest_foot(df_away_team_player)
    df_mean_age_home = _get_average_age(df_home_team_player)
    df_mean_age_away = _get_average_age(df_away_team_player)

    df_all_interesting_stats = pd.concat([df_average_rating_both_teams, df_weight_home, df_weight_away, df_height_home, df_height_away, df_keeper_stats_home, 
                                          df_keeper_stats_away, df_striker_stats_home, df_striker_stats_away, df_strongest_foot_home, df_strongest_foot_away,
                                          df_mean_age_home, df_mean_age_away],axis=1, join='outer')    

    return df_all_interesting_stats


def _get_all_player(team_api_id):
    df_tmp = match_data[['home_player_1', 'home_player_2','home_player_3', 'home_player_4', 'home_player_5', 'home_player_6', 
                         'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10', 'home_player_11', 'away_player_1', 'away_player_2','away_player_3', 'away_player_4', 'away_player_5', 'away_player_6', 
                         'away_player_7', 'away_player_8', 'away_player_9', 'away_player_10', 'away_player_11']].loc[match_data['home_team_api_id'] == team_api_id]
    return df_tmp

def _get_average_rating_teams(df_home_team_player, df_away_team_player):
    df_overall_rating_home = pd.DataFrame(columns=['Overall_Rating_Home'])
    df_overall_rating_away = pd.DataFrame(columns=['Overall_Rating_Away'])
    df_overall_rating = pd.DataFrame()
    overall_rating = 0
    i = 0      
    for player in df_home_team_player.iterrows():
        y = 0
        while y < 11:    
            df_single_rating = player_stats_data['overall_rating'].loc[player_stats_data['player_api_id'] == player[1][y]]
            single_rating = df_single_rating.mean()
            overall_rating = overall_rating + single_rating
            y = y + 1
        overall_rating = overall_rating / 11
        df_overall_rating_home.loc[i] = [overall_rating]
        overall_rating = 0
        i = i + 1

    i = 0
    for player in df_away_team_player.iterrows():
        y = 0
        while y < 11:    
            df_single_rating = player_stats_data['overall_rating'].loc[player_stats_data['player_api_id'] == player[1][y]]
            single_rating = df_single_rating.mean()
            overall_rating = overall_rating + single_rating
            y = y + 1
        overall_rating = overall_rating / 11
        df_overall_rating_away.loc[i] = [overall_rating]
        overall_rating = 0
        i = i + 1

    df_overall_rating = pd.concat([df_overall_rating_home, df_overall_rating_away],axis=1, join='outer')
    return df_overall_rating


def _get_average_weight(dataframe):
    df_overall_weight = pd.DataFrame(columns=['Overall_Weight'])
    overall_weight = 0
    i = 0      
    for player in dataframe.iterrows():
        y = 0
        while y < 11:    
            df_single_weight = player_data['weight'].loc[player_data['player_api_id'] == player[1][y]]
            overall_weight = overall_weight + df_single_weight.mean()
            y = y + 1
        overall_weight = overall_weight / 11
        df_overall_weight.loc[i] = [overall_weight]
        overall_weight = 0
        i = i + 1    
    return df_overall_weight


def _get_average_height(dataframe):
    df_overall_height = pd.DataFrame(columns=['Overall_Height'])
    overall_height = 0
    i = 0      
    for player in dataframe.iterrows():
        y = 0
        while y < 11:    
            df_single_height = player_data['height'].loc[player_data['player_api_id'] == player[1][y]]
            overall_height = overall_height + df_single_height.mean()
            y = y + 1
        overall_height = overall_height / 11
        df_overall_height.loc[i] = [overall_height]
        overall_height = 0
        i = i + 1    
    return df_overall_height


def _get_average_age(dataframe):
    df_mean_age = pd.DataFrame(columns=['Mean_Age'])
    index = 0
    for player in dataframe.iterrows():
        y = 0
        age_sum = 0
        while y < 11:
            df_age = player_data['birthday'].loc[player_data['player_api_id'] == player[1][y]]
            df_age.reset_index(inplace=True, drop=True)
            birthdate = df_age.loc[0]
            birthyear = birthdate[:4]
            age = 2023 - int(birthyear)
            age_sum = age_sum + age
            y = y + 1
            df_age = pd.DataFrame()
        mean_age = age_sum / 11
        df_mean_age.loc[index] = mean_age
        index = index + 1
    return df_mean_age
    

def _get_strongest_foot(dataframe):
    index = 0
    df_right = pd.DataFrame(columns=['Prefer_Right'])  
    df_left = pd.DataFrame(columns=['Prefer_Left'])  
    df_feet = pd.DataFrame() 
    for player in dataframe.iterrows():
        y = 0
        left = 0
        right = 0
        while y < 11:    
            df_preferred_foot = player_stats_data['preferred_foot'].loc[player_stats_data['player_api_id'] == player[1][y]]
            df_preferred_foot = df_preferred_foot.drop_duplicates()
            df_preferred_foot.reset_index(inplace=True, drop=True)
            preferred_foot = df_preferred_foot[0]
            if str(preferred_foot) == 'right':
                right = right + 1
            if str(preferred_foot) == 'left':
                left = left + 1
            y = y + 1
        df_right.loc[index] = right
        df_left.loc[index] = left
        index = index + 1
    df_feet = pd.concat([df_right, df_left],axis=1, join='outer')
    return df_feet


def _get_attacker_stats(dataframe):
    y = 0
    index = 0
    df_all_stats_mean = pd.DataFrame(columns=['Striker_Stats_Mean'])
    for player in dataframe:
        df_stats = player_stats_data[['finishing', 'free_kick_accuracy', 'heading_accuracy', 'long_shots', 'shot_power', 'volleys']].loc[player_stats_data['player_api_id'] == player]
        
        df_finishing = df_stats['finishing']
        df_free_kick_accuracy = df_stats['free_kick_accuracy']
        df_heading_accuracy = df_stats['heading_accuracy']
        df_long_shots = df_stats['long_shots']
        df_shot_power = df_stats['shot_power']
        df_volleys = df_stats['volleys']

        all_stats_mean = (df_finishing.mean() + df_free_kick_accuracy.mean() + df_heading_accuracy.mean() + df_long_shots.mean() + df_shot_power.mean() + df_volleys.mean()) / 6
        df_all_stats_mean.loc[index] = all_stats_mean
        index = index + 1
    return df_all_stats_mean


def _get_goalkeeper_stats(dataframe):
    index = 0
    df_all_stats_mean = pd.DataFrame(columns=['Goalkeeper_Stats_Mean'])
    for player in dataframe:
        df_stats = player_stats_data[['gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning', 'gk_reflexes']].loc[player_stats_data['player_api_id'] == player]

        df_diving = df_stats['gk_diving']
        df_handling = df_stats['gk_handling']
        df_kicking = df_stats['gk_kicking']
        df_positioning = df_stats['gk_positioning']
        df_reflexes = df_stats['gk_reflexes']

        all_stats_mean = (df_diving.mean() + df_handling.mean() + df_kicking.mean() + df_positioning.mean() + df_reflexes.mean()) / 5
        df_all_stats_mean.loc[index] = all_stats_mean

        index = index + 1
    return df_all_stats_mean


def _set_label(dataframe):
    home_goals = dataframe['Heimtore']
    away_goals = dataframe['Auswärtstore']
    df_label_outcome = pd.DataFrame(columns=['Label'])
    y = 0
    for i in home_goals:
        if i > away_goals[y]:
            df_label_outcome.loc[y, 'Label'] = 'Win'
        if i < away_goals[y]:
            df_label_outcome.loc[y, 'Label'] = 'Lose'
        if i == away_goals[y]:
            df_label_outcome.loc[y, 'Label'] = 'Draw'
        y = y + 1
    return df_label_outcome

print(get_data(8634))

     Overall_Rating_Home  Overall_Rating_Away  Overall_Weight  Overall_Weight  \
0              83.119733            72.897088      173.000000      167.636364   
1              82.156813            78.570763      170.454545      165.090909   
2              82.751995            80.897191      170.818182      170.090909   
3              80.204252            73.141484      171.454545      172.454545   
4              83.936311            73.459458      170.000000      168.909091   
..                   ...                  ...             ...             ...   
126            83.322252            68.772891      165.909091      164.181818   
127            78.812084            68.688699      162.181818      167.454545   
128            80.730499            69.121916      168.363636      159.090909   
129            79.194322            71.340261      167.545455      164.272727   
130            79.770107            68.141458      165.909091      158.181818   

     Overall_Height  Overal