In [205]:
import pandas as pd
import numpy as np
from add_rankings import add_rankings
from sklearn import linear_model

In [175]:
def calculate_team_stats(path):
    df = pd.read_excel(path)

    required_columns = ['Club', '2FG', '3FG', 'FT', 'Fouls(Rv)', 'PIR']
    if not all(column in df.columns for column in required_columns):
        raise ValueError(f"The file must contain required columns")
    
    df[['2pt_made','2pt_attempted']] = df['2FG'].str.split('/', expand = True).astype(int)
    df[['3pt_made','3pt_attempted']] = df['3FG'].str.split('/', expand = True).astype(int)
    df[['ft_made','ft_attempted']] = df['FT'].str.split('/', expand = True).astype(int)

    df['Fouls(Rv)'] = df['Fouls(Rv)'].astype(int)
    df['PIR'] = df['PIR'].astype(int)

    team_stats = df.groupby('Club').agg({
        '2pt_made': 'sum',
        '3pt_made': 'sum',
        'ft_made': 'sum',
        'Fouls(Rv)':'sum',
        'PIR': 'sum'
    }).reset_index()

    return team_stats


In [176]:
path = 'data/Euroleague_20_21.xlsx'
team_stats = calculate_team_stats(path)

In [177]:
team_stats

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR
0,AX Armani Exchange Milan,797,387,589,819,3514
1,Alba Berlin,647,342,354,664,2863
2,Anadolu Efes,795,418,609,836,3908
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183
4,CSKA Moscow,759,388,599,847,3516
5,Crvena Zvezda Mts Belgrade,587,292,470,680,2469
6,FC Barcelona,855,324,598,866,3566
7,FC Bayern Munich,831,281,561,801,3201
8,Fenerbahce Beko Istanbul,781,301,415,685,3106
9,Khimki Moscow Region,620,315,449,630,2626


In [178]:
team_stats['score'] = 47.19 + 0.15 * team_stats['Fouls(Rv)'] + 0.78 * team_stats['2pt_made'] + 1.39 * team_stats['3pt_made'] + 0.54 * team_stats['ft_made']

In [179]:
team_stats

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
1,Alba Berlin,647,342,354,664,2863,1317.99
2,Anadolu Efes,795,418,609,836,3908,1702.57
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71
4,CSKA Moscow,759,388,599,847,3516,1629.04
5,Crvena Zvezda Mts Belgrade,587,292,470,680,2469,1266.73
6,FC Barcelona,855,324,598,866,3566,1617.27
7,FC Bayern Munich,831,281,561,801,3201,1509.05
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
9,Khimki Moscow Region,620,315,449,630,2626,1305.6


In [180]:
team_stats = team_stats.sort_values(by=['score']).iloc[::-1]

In [181]:
team_stats 

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
2,Anadolu Efes,795,418,609,836,3908,1702.57
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
4,CSKA Moscow,759,388,599,847,3516,1629.04
6,FC Barcelona,855,324,598,866,3566,1617.27
14,Real Madrid,720,400,458,734,3429,1522.21
7,FC Bayern Munich,831,281,561,801,3201,1509.05
17,Zenit St Petersburg,705,358,537,803,3266,1505.14
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
15,Valencia Basket,668,318,493,697,3075,1381.02
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71


In [182]:
team_stats_pir = team_stats.sort_values(by=['PIR']).iloc[::-1]

In [183]:
team_stats_pir

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
2,Anadolu Efes,795,418,609,836,3908,1702.57
6,FC Barcelona,855,324,598,866,3566,1617.27
4,CSKA Moscow,759,388,599,847,3516,1629.04
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
14,Real Madrid,720,400,458,734,3429,1522.21
17,Zenit St Petersburg,705,358,537,803,3266,1505.14
7,FC Bayern Munich,831,281,561,801,3201,1509.05
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
15,Valencia Basket,668,318,493,697,3075,1381.02


In [184]:
data = add_rankings('data/euroleague_teams.csv')

In [185]:
training_data = data[data['ranking'].notna()]

In [186]:
training_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,total_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking
216,E2016_BAM,E2016,BAM,30.0,1214.3,2397,602,1098,280,697,...,31.57,18.23,5.47,12.50,2.73,2.57,21.93,18.93,86.13,13.0
217,E2016_BAR,E2016,BAR,30.0,1205.0,2141,496,1022,271,716,...,33.83,17.23,7.07,14.77,2.03,2.13,18.43,19.83,79.67,11.0
218,E2016_BAS,E2016,BAS,33.0,1319.8,2705,719,1349,270,763,...,36.55,18.39,6.70,13.70,3.03,2.82,21.58,20.64,90.85,8.0
219,E2016_CSK,E2016,CSK,35.0,1409.9,3063,745,1351,305,756,...,33.46,19.86,7.11,14.31,3.23,3.09,22.60,22.80,99.69,1.0
220,E2016_DAR,E2016,DAR,34.0,1365.0,2691,646,1322,304,826,...,34.88,13.97,6.15,11.09,2.47,3.82,20.32,20.76,81.71,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,E2022_RED,E2022,RED,34.0,1375.0,2626,657,1205,281,832,...,32.76,16.91,6.65,12.53,1.65,2.65,21.47,20.12,82.35,11.0
332,E2022_TEL,E2022,TEL,39.0,1565.0,3262,841,1521,348,1000,...,35.69,16.38,6.62,11.44,2.90,2.13,22.44,19.26,90.74,5.0
333,E2022_ULK,E2022,ULK,39.0,1575.0,3229,814,1469,372,997,...,33.49,17.97,6.49,11.77,1.59,2.46,20.79,20.41,90.13,7.0
334,E2022_VIR,E2022,VIR,34.0,1365.0,2653,628,1151,301,805,...,30.79,19.44,7.12,14.53,2.09,2.47,20.88,20.06,85.65,14.0


In [187]:
test_data = data[data['season_team_id'].str.startswith('E2023')]

In [188]:
test_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,total_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking
336,E2023_ASV,E2023,ASV,34.0,1370.0,2674,744,1388,244,723,...,33.97,17.82,6.03,12.97,1.62,3.29,18.41,19.35,85.82,
337,E2023_BAR,E2023,BAR,39.0,1565.0,3163,856,1568,328,912,...,35.85,19.31,6.41,12.79,2.28,1.92,19.85,18.95,91.59,
338,E2023_BAS,E2023,BAS,39.0,1565.0,3300,752,1351,424,1143,...,34.97,18.97,5.51,12.46,2.21,3.38,18.18,18.79,93.05,
339,E2023_BER,E2023,BER,34.0,1360.0,2591,641,1260,302,860,...,32.44,16.44,7.0,15.09,2.15,3.5,18.32,18.18,77.76,
340,E2023_IST,E2023,IST,35.0,1425.0,2992,747,1346,348,940,...,32.54,17.63,7.03,10.34,3.14,2.14,18.31,18.51,96.51,
341,E2023_MAD,E2023,MAD,39.0,1590.0,3459,820,1414,407,1076,...,36.44,20.59,6.51,12.03,3.62,1.56,18.9,20.08,107.56,
342,E2023_MCO,E2023,MCO,39.0,1575.0,3189,849,1601,305,872,...,33.87,16.05,6.87,9.97,1.82,2.21,19.62,21.56,91.36,
343,E2023_MIL,E2023,MIL,34.0,1370.0,2659,615,1116,341,930,...,32.12,16.29,7.0,12.44,2.12,1.74,19.56,18.97,85.47,
344,E2023_MUN,E2023,MUN,34.0,1385.0,2674,649,1215,336,938,...,35.41,16.09,6.06,13.03,2.62,2.47,20.65,18.79,84.38,
345,E2023_OLY,E2023,OLY,41.0,1660.0,3229,772,1396,387,1030,...,33.2,19.02,7.17,12.15,2.61,2.41,18.93,19.61,91.24,


In [189]:
model = linear_model.LinearRegression()

In [190]:
training_data_ranking = training_data['ranking']

In [191]:
training_data['two_points_percentage'] = training_data['two_points_made_per_game'] / training_data['two_points_attempted_per_game']
training_data['three_points_percentage'] = training_data['three_points_made_per_game'] / training_data['three_points_attempted_per_game']
training_data['free_throws_percentage'] = training_data['free_throws_made_per_game'] / training_data['free_throws_attempted_per_game']
parameters = ['two_points_percentage','three_points_percentage','free_throws_percentage','offensive_rebounds_per_game','defensive_rebounds_per_game','assists_per_game','steals_per_game','turnovers_per_game','fouls_received_per_game']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_data['two_points_percentage'] = training_data['two_points_made_per_game'] / training_data['two_points_attempted_per_game']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  training_data['three_points_percentage'] = training_data['three_points_made_per_game'] / training_data['three_points_attempted_per_game']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [192]:
training_data = training_data[parameters]

In [193]:
training_data

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
216,0.548361,0.401636,0.783100,7.43,24.13,18.23,5.47,12.50,18.93
217,0.485178,0.378299,0.736842,9.40,24.43,17.23,7.07,14.77,19.83
218,0.533023,0.353806,0.763085,11.18,25.36,18.39,6.70,13.70,20.64
219,0.551554,0.403241,0.821319,9.46,24.00,19.86,7.11,14.31,22.80
220,0.488683,0.368053,0.733231,11.68,23.21,13.97,6.15,11.09,20.76
...,...,...,...,...,...,...,...,...,...
331,0.545147,0.337556,0.775155,10.41,22.35,16.91,6.65,12.53,20.12
332,0.552821,0.347894,0.792845,11.90,23.79,16.38,6.62,11.44,19.26
333,0.554022,0.373239,0.722836,11.38,22.10,17.97,6.49,11.77,20.41
334,0.545643,0.373733,0.793122,7.97,22.82,19.44,7.12,14.53,20.06


In [194]:
model.fit(training_data,training_data_ranking)

In [195]:
test_data['two_points_percentage'] = test_data['two_points_made_per_game'] / test_data['two_points_attempted_per_game']
test_data['three_points_percentage'] = test_data['three_points_made_per_game'] / test_data['three_points_attempted_per_game']
test_data['free_throws_percentage'] = test_data['free_throws_made_per_game'] / test_data['free_throws_attempted_per_game']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['two_points_percentage'] = test_data['two_points_made_per_game'] / test_data['two_points_attempted_per_game']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['three_points_percentage'] = test_data['three_points_made_per_game'] / test_data['three_points_attempted_per_game']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexi

In [196]:
test_data = test_data[parameters]

In [197]:
test_data

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
336,0.536012,0.337723,0.773465,10.29,23.68,17.82,6.03,12.97,19.35
337,0.545884,0.359709,0.725894,11.54,24.31,19.31,6.41,12.79,18.95
338,0.556582,0.370863,0.761905,10.49,24.49,18.97,5.51,12.46,18.79
339,0.508635,0.351127,0.791583,10.82,21.62,16.44,7.0,15.09,18.18
340,0.554862,0.370067,0.812148,10.29,22.26,17.63,7.03,10.34,18.51
341,0.579978,0.378398,0.814559,9.82,26.62,20.59,6.51,12.03,20.08
342,0.530329,0.349732,0.748227,11.03,22.85,16.05,6.87,9.97,21.56
343,0.551188,0.366728,0.776333,8.88,23.24,16.29,7.0,12.44,18.97
344,0.534135,0.358101,0.797935,10.71,24.71,16.09,6.06,13.03,18.79
345,0.55301,0.375796,0.729869,9.68,23.51,19.02,7.17,12.15,19.61


In [198]:
rankings_2023_predicted = model.predict(test_data)

In [199]:
test_data

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
336,0.536012,0.337723,0.773465,10.29,23.68,17.82,6.03,12.97,19.35
337,0.545884,0.359709,0.725894,11.54,24.31,19.31,6.41,12.79,18.95
338,0.556582,0.370863,0.761905,10.49,24.49,18.97,5.51,12.46,18.79
339,0.508635,0.351127,0.791583,10.82,21.62,16.44,7.0,15.09,18.18
340,0.554862,0.370067,0.812148,10.29,22.26,17.63,7.03,10.34,18.51
341,0.579978,0.378398,0.814559,9.82,26.62,20.59,6.51,12.03,20.08
342,0.530329,0.349732,0.748227,11.03,22.85,16.05,6.87,9.97,21.56
343,0.551188,0.366728,0.776333,8.88,23.24,16.29,7.0,12.44,18.97
344,0.534135,0.358101,0.797935,10.71,24.71,16.09,6.06,13.03,18.79
345,0.55301,0.375796,0.729869,9.68,23.51,19.02,7.17,12.15,19.61


In [200]:
model.coef_

array([-71.54454335, -86.19463086,   5.69586831,  -0.92624566,
        -1.37459459,   0.33891153,  -1.52459452,   1.10456036,
        -0.87039763])

In [201]:
sorted_ranking = sorted(rankings_2023_predicted)

order_ranking = {value: index + 1 for index, value in enumerate(sorted_ranking)}

result = [order_ranking[value] for value in rankings_2023_predicted]

In [202]:
result

[16, 9, 10, 18, 7, 1, 3, 11, 13, 4, 17, 2, 8, 12, 6, 5, 14, 15]

In [203]:
test_data

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
336,0.536012,0.337723,0.773465,10.29,23.68,17.82,6.03,12.97,19.35
337,0.545884,0.359709,0.725894,11.54,24.31,19.31,6.41,12.79,18.95
338,0.556582,0.370863,0.761905,10.49,24.49,18.97,5.51,12.46,18.79
339,0.508635,0.351127,0.791583,10.82,21.62,16.44,7.0,15.09,18.18
340,0.554862,0.370067,0.812148,10.29,22.26,17.63,7.03,10.34,18.51
341,0.579978,0.378398,0.814559,9.82,26.62,20.59,6.51,12.03,20.08
342,0.530329,0.349732,0.748227,11.03,22.85,16.05,6.87,9.97,21.56
343,0.551188,0.366728,0.776333,8.88,23.24,16.29,7.0,12.44,18.97
344,0.534135,0.358101,0.797935,10.71,24.71,16.09,6.06,13.03,18.79
345,0.55301,0.375796,0.729869,9.68,23.51,19.02,7.17,12.15,19.61


In [204]:
# implementing genetic algorithm

In [207]:
num_of_generations = 1000
population = np.random.rand(100, len(parameters))

In [212]:
len(population)

100

In [208]:
crossover = np.random.rand()

0.2946061318516391

In [210]:
def fitness_score(chromosome, stats, rankings):
    rankings_prediction = stats.dot(chromosome)
    MSE = np.square(np.subtract(rankings,rankings_prediction)).mean()
    return 1 / (1+MSE) 

In [213]:
fitness_scores = np.array([fitness_score(chrom, training_data, training_data_ranking) for chrom in population])
len(fitness_scores)

100

In [214]:
population_with_scores = [{'chromosome': population[i], 'fitness_score': fitness_scores[i]} for i in range(len(fitness_scores))]

In [215]:
population_with_scores

[{'chromosome': array([0.99921014, 0.44383131, 0.24952115, 0.82269324, 0.78400617,
         0.92790044, 0.7777993 , 0.27050535, 0.01720108]),
  'fitness_score': 0.0005261346753089941},
 {'chromosome': array([0.67117207, 0.00656251, 0.30286042, 0.46942086, 0.9509693 ,
         0.55150581, 0.35063705, 0.90541254, 0.94169795]),
  'fitness_score': 0.00027014430064938624},
 {'chromosome': array([0.64667221, 0.45127938, 0.81924001, 0.62838907, 0.87947592,
         0.07271726, 0.24685516, 0.44964327, 0.31371544]),
  'fitness_score': 0.0008560091579933216},
 {'chromosome': array([0.08835024, 0.11747325, 0.32002659, 0.72560527, 0.98902389,
         0.71803191, 0.00566154, 0.88070645, 0.61228285]),
  'fitness_score': 0.0002997712642165637},
 {'chromosome': array([0.74783189, 0.8589608 , 0.05936465, 0.86784931, 0.84268951,
         0.98044076, 0.33886244, 0.08169183, 0.46313514]),
  'fitness_score': 0.00040065976976735125},
 {'chromosome': array([0.24482144, 0.69596964, 0.2474848 , 0.75480266, 0.

In [223]:
for generation in range(num_of_generations):
    population_with_scores = sorted(population_with_scores, key = lambda x:x['fitness_score'], reverse=True)
    new_generation = [] 
    population = [el['chromosome'] for el in population_with_scores]

    s = int((10*len(population_with_scores))/100)
    new_generation.extend(population[:s])

    s = int((90*len(population_with_scores))/100)
    for _ in range(s): 
        parent1 = np.random.choice(population[:50]) 
        parent2 = np.random.choice(population[:50])
        if np.random.rand() < 0.9:
            crossover_point = np.random.randint(1, len(parameters)-1)
            offspring1 = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
            offspring2 = np.concatenate([parent2[:crossover_point], parent1[crossover_point:]])
        else:
            offspring1, offspring2 = parent1, parent2
        new_generation.extend([offspring1, offspring2])

    for offspring in new_generation:
        if np.random.rand() < 0.01:
            mutation_point = np.random.randint(len(parameters))
            offspring[mutation_point] = np.random.rand()

    population = np.array(new_generation)
    fitness_scores = np.array([fitness_score(chrom, training_data, training_data_ranking) for chrom in population])
    population_with_scores = [{'chromosome': population[i], 'fitness_score': fitness_scores[i]} for i in range(len(fitness_scores))]
    
best_chromosome = sorted(population_with_scores, key = lambda x:x['fitness_score'], reverse=True)[0]
best_chromosome

NameError: name 'random' is not defined