In [54]:
import pandas as pd
import numpy as np
from add_rankings import add_rankings
from sklearn import linear_model
from scipy.optimize import minimize
from sklearn import metrics
import math

Na osnovu statistika igrača iz sezone 2020/2021 koji se nalaze u fajlu Euroleague_20_21.xlsx, izvlače se statistike timova poput broja pogođenih šuteva za 2 i tri, broja faulova, PIR.
Nakon izvlačenja statistika timova, vrši se predviđanje rangiranja timova po formuli, dobijenoj iz rada [1].

$$ Ranking = 47.19 + 0.15 * ReversedFouls + 0.78* 2PointsMade + 1.39 * 3PointsМade + 0.54 * 1PointMade $$

[1]: How To Win the Basketball Euroleague? Game Performance Determining Sports Results During 2003-2016 Matches  https://www.researchgate.net/publication/349466836_How_to_Win_the_Basketball_Euroleague_Game_Performance_Determining_Sports_Results_During_2003-2016_Matches

In [2]:
def calculate_team_stats(path):
    df = pd.read_excel(path)

    required_columns = ['Club', '2FG', '3FG', 'FT', 'Fouls(Rv)', 'PIR']
    if not all(column in df.columns for column in required_columns):
        raise ValueError(f"The file must contain required columns")
    
    df[['2pt_made','2pt_attempted']] = df['2FG'].str.split('/', expand = True).astype(int)
    df[['3pt_made','3pt_attempted']] = df['3FG'].str.split('/', expand = True).astype(int)
    df[['ft_made','ft_attempted']] = df['FT'].str.split('/', expand = True).astype(int)

    df['Fouls(Rv)'] = df['Fouls(Rv)'].astype(int)
    df['PIR'] = df['PIR'].astype(int)

    team_stats = df.groupby('Club').agg({
        '2pt_made': 'sum',
        '3pt_made': 'sum',
        'ft_made': 'sum',
        'Fouls(Rv)':'sum',
        'PIR': 'sum'
    }).reset_index()

    return team_stats


In [3]:
path = 'data/Euroleague_20_21.xlsx'
team_stats = calculate_team_stats(path)

In [4]:
team_stats

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR
0,AX Armani Exchange Milan,797,387,589,819,3514
1,Alba Berlin,647,342,354,664,2863
2,Anadolu Efes,795,418,609,836,3908
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183
4,CSKA Moscow,759,388,599,847,3516
5,Crvena Zvezda Mts Belgrade,587,292,470,680,2469
6,FC Barcelona,855,324,598,866,3566
7,FC Bayern Munich,831,281,561,801,3201
8,Fenerbahce Beko Istanbul,781,301,415,685,3106
9,Khimki Moscow Region,620,315,449,630,2626


In [5]:
team_stats['score'] = 47.19 + 0.15 * team_stats['Fouls(Rv)'] + 0.78 * team_stats['2pt_made'] + 1.39 * team_stats['3pt_made'] + 0.54 * team_stats['ft_made']

In [6]:
team_stats

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
1,Alba Berlin,647,342,354,664,2863,1317.99
2,Anadolu Efes,795,418,609,836,3908,1702.57
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71
4,CSKA Moscow,759,388,599,847,3516,1629.04
5,Crvena Zvezda Mts Belgrade,587,292,470,680,2469,1266.73
6,FC Barcelona,855,324,598,866,3566,1617.27
7,FC Bayern Munich,831,281,561,801,3201,1509.05
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
9,Khimki Moscow Region,620,315,449,630,2626,1305.6


In [7]:
team_stats = team_stats.sort_values(by=['score']).iloc[::-1]

Prikazujemo predvidjeno rangiranje za sezonu 2020/2021

In [8]:
team_stats 

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
2,Anadolu Efes,795,418,609,836,3908,1702.57
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
4,CSKA Moscow,759,388,599,847,3516,1629.04
6,FC Barcelona,855,324,598,866,3566,1617.27
14,Real Madrid,720,400,458,734,3429,1522.21
7,FC Bayern Munich,831,281,561,801,3201,1509.05
17,Zenit St Petersburg,705,358,537,803,3266,1505.14
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
15,Valencia Basket,668,318,493,697,3075,1381.02
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71


Pravo rangiranje za tu sezonu:
![image.png](data\rankings_2021.png)

Takodje, posto parametar PIR predstavlja efikasnost timova, pokušaj je bio poređati timove po njihovoj ukupnoj efikasnosti i videti njihov renking

In [9]:
team_stats_pir = team_stats.sort_values(by=['PIR']).iloc[::-1]

In [10]:
team_stats_pir

Unnamed: 0,Club,2pt_made,3pt_made,ft_made,Fouls(Rv),PIR,score
2,Anadolu Efes,795,418,609,836,3908,1702.57
6,FC Barcelona,855,324,598,866,3566,1617.27
4,CSKA Moscow,759,388,599,847,3516,1629.04
0,AX Armani Exchange Milan,797,387,589,819,3514,1647.69
14,Real Madrid,720,400,458,734,3429,1522.21
17,Zenit St Petersburg,705,358,537,803,3266,1505.14
7,FC Bayern Munich,831,281,561,801,3201,1509.05
3,Baskonia Vitoria-Gasteiz,685,314,439,678,3183,1356.71
8,Fenerbahce Beko Istanbul,781,301,415,685,3106,1401.61
15,Valencia Basket,668,318,493,697,3075,1381.02


Zaključak: oba načina predviđaju predviđaju u kojoj polovini tabela se nalaze timovi, al njihov tačni poredak bi mogli i bolje.

U datoteci data/euroleague_teams.csv se nalaze statistike timova od sezona 2008-2023. Za trening skup koriscene su statistike od sezona 2017-2023, pri čemu je ciljna promenljiva rang timova (dodato u fajlu add_rankings.py), a za test skup podaci za sezonu 2024. Dodati su jos neki parametri, poput procenta šuteva i pogođenih slobodnih bacanja, kako bi se mogla izvršiti bolja predviđanja.

In [11]:
data = add_rankings('data/euroleague_teams.csv')

data['PIR'] = 3*data['two_points_made_per_game'] - data['two_points_attempted_per_game'] + 4*data['three_points_made_per_game'] - data['three_points_attempted_per_game'] + data['free_throws_made_per_game'] - data['free_throws_attempted_per_game'] 
+ data['assists_per_game'] + data['steals_per_game'] + data['blocks_against_per_game'] + data ['fouls_received_per_game'] - data['turnovers_per_game'] - data['blocks_favour_per_game'] - data['fouls_committed_per_game']
 
data['two_points_percentage'] = data['two_points_made_per_game'] / data['two_points_attempted_per_game']
data['three_points_percentage'] = data['three_points_made_per_game'] / data['three_points_attempted_per_game']
data['free_throws_percentage'] = data['free_throws_made_per_game'] / data['free_throws_attempted_per_game']

In [12]:
training_data = data[data['ranking'].notna()]

In [13]:
test_data = data[data['season_team_id'].str.startswith('E2023')]

In [14]:
test_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking,actual_ranking,PIR
336,E2023_ASV,E2023,ASV,34.0,1370.0,2674,744,1388,244,723,...,6.03,12.97,1.62,3.29,18.41,19.35,85.82,,17.0,28.37
337,E2023_BAR,E2023,BAR,39.0,1565.0,3163,856,1568,328,912,...,6.41,12.79,2.28,1.92,19.85,18.95,91.59,,5.0,31.38
338,E2023_BAS,E2023,BAS,39.0,1565.0,3300,752,1351,424,1143,...,5.51,12.46,2.21,3.38,18.18,18.79,93.05,,9.0,33.17
339,E2023_BER,E2023,BER,34.0,1360.0,2591,641,1260,302,860,...,7.0,15.09,2.15,3.5,18.32,18.18,77.76,,18.0,26.6
340,E2023_IST,E2023,IST,35.0,1425.0,2992,747,1346,348,940,...,7.03,10.34,3.14,2.14,18.31,18.51,96.51,,10.0,35.46
341,E2023_MAD,E2023,MAD,39.0,1590.0,3459,820,1414,407,1076,...,6.51,12.03,3.62,1.56,18.9,20.08,107.56,,1.0,37.51
342,E2023_MCO,E2023,MCO,39.0,1575.0,3189,849,1601,305,872,...,6.87,9.97,1.82,2.21,19.62,21.56,91.36,,3.0,28.21
343,E2023_MIL,E2023,MIL,34.0,1370.0,2659,615,1116,341,930,...,7.0,12.44,2.12,1.74,19.56,18.97,85.47,,12.0,30.78
344,E2023_MUN,E2023,MUN,34.0,1385.0,2674,649,1215,336,938,...,6.06,13.03,2.62,2.47,20.65,18.79,84.38,,15.0,30.72
345,E2023_OLY,E2023,OLY,41.0,1660.0,3229,772,1396,387,1030,...,7.17,12.15,2.61,2.41,18.93,19.61,91.24,,4.0,30.35


In [15]:
model = linear_model.LinearRegression()

In [16]:
training_data_ranking = training_data['ranking']

In [17]:
training_data_ranking

216    13.0
217    11.0
218     8.0
219     1.0
220     9.0
       ... 
331    11.0
332     5.0
333     7.0
334    14.0
335     9.0
Name: ranking, Length: 119, dtype: float64

Atributi korišćeni pri obučavanju

In [18]:
parameters = ['two_points_percentage','three_points_percentage','free_throws_percentage','offensive_rebounds_per_game','defensive_rebounds_per_game','assists_per_game','steals_per_game','turnovers_per_game','fouls_received_per_game']

In [19]:
training_data = training_data[parameters]

In [20]:
training_data

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
216,0.548361,0.401636,0.783100,7.43,24.13,18.23,5.47,12.50,18.93
217,0.485178,0.378299,0.736842,9.40,24.43,17.23,7.07,14.77,19.83
218,0.533023,0.353806,0.763085,11.18,25.36,18.39,6.70,13.70,20.64
219,0.551554,0.403241,0.821319,9.46,24.00,19.86,7.11,14.31,22.80
220,0.488683,0.368053,0.733231,11.68,23.21,13.97,6.15,11.09,20.76
...,...,...,...,...,...,...,...,...,...
331,0.545147,0.337556,0.775155,10.41,22.35,16.91,6.65,12.53,20.12
332,0.552821,0.347894,0.792845,11.90,23.79,16.38,6.62,11.44,19.26
333,0.554022,0.373239,0.722836,11.38,22.10,17.97,6.49,11.77,20.41
334,0.545643,0.373733,0.793122,7.97,22.82,19.44,7.12,14.53,20.06


In [21]:
model.fit(training_data,training_data_ranking)

In [22]:
test_data_params = test_data[parameters]

In [23]:
test_data_params

Unnamed: 0,two_points_percentage,three_points_percentage,free_throws_percentage,offensive_rebounds_per_game,defensive_rebounds_per_game,assists_per_game,steals_per_game,turnovers_per_game,fouls_received_per_game
336,0.536012,0.337723,0.773465,10.29,23.68,17.82,6.03,12.97,19.35
337,0.545884,0.359709,0.725894,11.54,24.31,19.31,6.41,12.79,18.95
338,0.556582,0.370863,0.761905,10.49,24.49,18.97,5.51,12.46,18.79
339,0.508635,0.351127,0.791583,10.82,21.62,16.44,7.0,15.09,18.18
340,0.554862,0.370067,0.812148,10.29,22.26,17.63,7.03,10.34,18.51
341,0.579978,0.378398,0.814559,9.82,26.62,20.59,6.51,12.03,20.08
342,0.530329,0.349732,0.748227,11.03,22.85,16.05,6.87,9.97,21.56
343,0.551188,0.366728,0.776333,8.88,23.24,16.29,7.0,12.44,18.97
344,0.534135,0.358101,0.797935,10.71,24.71,16.09,6.06,13.03,18.79
345,0.55301,0.375796,0.729869,9.68,23.51,19.02,7.17,12.15,19.61


In [24]:
rankings_2023_predicted = model.predict(test_data_params)

In [25]:
test_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking,actual_ranking,PIR
336,E2023_ASV,E2023,ASV,34.0,1370.0,2674,744,1388,244,723,...,6.03,12.97,1.62,3.29,18.41,19.35,85.82,,17.0,28.37
337,E2023_BAR,E2023,BAR,39.0,1565.0,3163,856,1568,328,912,...,6.41,12.79,2.28,1.92,19.85,18.95,91.59,,5.0,31.38
338,E2023_BAS,E2023,BAS,39.0,1565.0,3300,752,1351,424,1143,...,5.51,12.46,2.21,3.38,18.18,18.79,93.05,,9.0,33.17
339,E2023_BER,E2023,BER,34.0,1360.0,2591,641,1260,302,860,...,7.0,15.09,2.15,3.5,18.32,18.18,77.76,,18.0,26.6
340,E2023_IST,E2023,IST,35.0,1425.0,2992,747,1346,348,940,...,7.03,10.34,3.14,2.14,18.31,18.51,96.51,,10.0,35.46
341,E2023_MAD,E2023,MAD,39.0,1590.0,3459,820,1414,407,1076,...,6.51,12.03,3.62,1.56,18.9,20.08,107.56,,1.0,37.51
342,E2023_MCO,E2023,MCO,39.0,1575.0,3189,849,1601,305,872,...,6.87,9.97,1.82,2.21,19.62,21.56,91.36,,3.0,28.21
343,E2023_MIL,E2023,MIL,34.0,1370.0,2659,615,1116,341,930,...,7.0,12.44,2.12,1.74,19.56,18.97,85.47,,12.0,30.78
344,E2023_MUN,E2023,MUN,34.0,1385.0,2674,649,1215,336,938,...,6.06,13.03,2.62,2.47,20.65,18.79,84.38,,15.0,30.72
345,E2023_OLY,E2023,OLY,41.0,1660.0,3229,772,1396,387,1030,...,7.17,12.15,2.61,2.41,18.93,19.61,91.24,,4.0,30.35


In [26]:
model.coef_

array([-71.54454335, -86.19463086,   5.69586831,  -0.92624566,
        -1.37459459,   0.33891153,  -1.52459452,   1.10456036,
        -0.87039763])

In [27]:
sorted_ranking = sorted(rankings_2023_predicted)

order_ranking = {value: index + 1 for index, value in enumerate(sorted_ranking)}

result = [order_ranking[value] for value in rankings_2023_predicted]

In [28]:
test_data['ranking'] = result

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['ranking'] = result


In [29]:
test_data = test_data.sort_values(by=['ranking']).iloc[::1]
test_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking,actual_ranking,PIR
341,E2023_MAD,E2023,MAD,39.0,1590.0,3459,820,1414,407,1076,...,6.51,12.03,3.62,1.56,18.9,20.08,107.56,1,1.0,37.51
347,E2023_PAN,E2023,PAN,41.0,1653.0,3380,812,1475,377,966,...,6.76,12.46,2.68,2.41,17.68,21.63,95.32,2,2.0,31.22
342,E2023_MCO,E2023,MCO,39.0,1575.0,3189,849,1601,305,872,...,6.87,9.97,1.82,2.21,19.62,21.56,91.36,3,3.0,28.21
345,E2023_OLY,E2023,OLY,41.0,1660.0,3229,772,1396,387,1030,...,7.17,12.15,2.61,2.41,18.93,19.61,91.24,4,4.0,30.35
351,E2023_ULK,E2023,ULK,41.0,1670.0,3449,759,1365,470,1199,...,5.85,12.07,2.22,1.73,21.39,19.73,92.2,5,7.0,35.53
350,E2023_TEL,E2023,TEL,40.0,1603.0,3508,919,1710,356,973,...,7.1,11.68,3.05,2.33,20.38,19.4,98.58,6,6.0,33.2
340,E2023_IST,E2023,IST,35.0,1425.0,2992,747,1346,348,940,...,7.03,10.34,3.14,2.14,18.31,18.51,96.51,7,10.0,35.46
348,E2023_PAR,E2023,PAR,34.0,1375.0,2822,750,1282,273,755,...,6.56,12.44,2.32,1.59,21.06,20.41,90.12,8,11.0,35.11
337,E2023_BAR,E2023,BAR,39.0,1565.0,3163,856,1568,328,912,...,6.41,12.79,2.28,1.92,19.85,18.95,91.59,9,5.0,31.38
338,E2023_BAS,E2023,BAS,39.0,1565.0,3300,752,1351,424,1143,...,5.51,12.46,2.21,3.38,18.18,18.79,93.05,10,9.0,33.17


In [30]:
rmse_lin_reg = metrics.root_mean_squared_error(test_data['actual_ranking'], test_data['ranking'])
r2_lin_reg = metrics.r2_score(test_data['actual_ranking'], test_data['ranking'])

Opis genetskog algoritma: genetski algoritam čini populacija koja se sastoji od skupa hromozoma, koji obavljaju prirodnu selekciju, gde oni sa najboljim skorom se ukrštaju i prolaze u dalji proces selekcije. Populacija čini niz od 50 hromozoma, gde su hromozomi nizovi dužine broja atributa sa nasumično generisanim brojevima.

Skor se računa kao:

$$ FitnessScore = 1 / (1 + MSE + Regularization) $$

Gde MSE predstavlja srednjekvadratnu gresku, a Regularization regularizacioni parametar koji se računa kao <em>Lambda * Suma kvadrata brojeva u nizu<em> gde je Lambda 0.01

In [31]:
num_of_generations = 20
population = np.random.rand(50, len(parameters))

In [32]:
crossover = np.random.rand()

In [33]:
def fitness_for_single_chromosome(chromosome):
    predictions = np.dot(chromosome, training_data.values.T)
    mse = np.mean((predictions - training_data_ranking.values) ** 2)
    regularization = 0.01 * np.sum(chromosome ** 2)
    return 1 / (1 + mse + regularization)

def fitness_scores(population):
    fitness_score_arr=np.array([fitness_for_single_chromosome(chrom) for chrom in population])
    return fitness_score_arr

In [34]:
def apply_gradient_descent(solution):
    result = minimize(fitness_for_single_chromosome, solution, method='BFGS')
    return result.x
    

In [35]:
fitness_score_arr = fitness_scores(population)
len(fitness_score_arr)

50

In [36]:
population_with_scores = [{'chromosome': population[i], 'fitness_score': fitness_score_arr[i]} for i in range(len(fitness_score_arr))]

In [37]:
population_with_scores
initial_mutation_rate = 0.1
initial_mutation_strength = 0.1

In [38]:
for generation in range(num_of_generations):
    sorted_indices = np.argsort(fitness_score_arr)[::-1]
    population = population[sorted_indices]
    fitness_score_arr = fitness_score_arr[sorted_indices]
    
    # Elitism
    top_indices = sorted_indices[:int(0.1 * len(fitness_score_arr))]
    new_generation = population[top_indices]
    
    # Crossover with multi-point crossover
    num_offspring = int(0.9 * len(population))
    parent_indices = np.random.choice(range(11), size=(num_offspring, 2))
    crossover_points = np.sort(np.random.choice(range(1, len(parameters) - 1), size=2, replace=False))
    offspring1 = np.array([np.concatenate([population[parent1][:crossover_points[0]], 
                                           population[parent2][crossover_points[0]:crossover_points[1]], 
                                           population[parent1][crossover_points[1]:]])
                           for (parent1, parent2) in parent_indices])

    offspring2 = np.array([np.concatenate([population[parent2][:crossover_points[0]], 
                                           population[parent1][crossover_points[0]:crossover_points[1]], 
                                           population[parent2][crossover_points[1]:]])
                           for (parent1, parent2) in parent_indices])
    new_generation = np.concatenate([new_generation, offspring1, offspring2])

    # Mutation with Gaussian perturbation
    mutation_rate = initial_mutation_rate * (1 - (generation / num_of_generations))
    mutation_strength = initial_mutation_strength * (1 - (generation / num_of_generations))
    mutation_mask = np.random.rand(len(new_generation), len(parameters)) < mutation_rate
    new_generation += mutation_mask * np.random.normal(0, mutation_strength, new_generation.shape)

    # Periodic application of gradient descent
    if generation % 3 == 0:
        for i in range(min(3, len(new_generation))):  # Ensure not to index out of bounds
            new_generation[i] = apply_gradient_descent(new_generation[i])

    population = np.array(new_generation)
    fitness_score_arr = fitness_scores(population)
    print(generation)
        
    
best_chromosome = population[0]
best_chromosome
best_score = fitness_score_arr[0]
best_score

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


0.04786841316848563

In [39]:
predicted_rankings = np.dot(test_data[parameters], population[0])

In [40]:
sorted_ranking_pr = sorted(predicted_rankings)

order_ranking = {value: index + 1 for index, value in enumerate(sorted_ranking_pr)}

result_pr = [order_ranking[value] for value in predicted_rankings]

In [41]:
result_pr

[2, 11, 1, 7, 4, 6, 3, 14, 10, 5, 13, 9, 15, 8, 16, 12, 17, 18]

In [42]:
best_chromosome

array([ 0.99708934,  0.73616887,  0.13923114,  0.18232008, -0.17639804,
       -0.38412351,  0.42757344,  1.42749648, -0.18471685])

In [43]:
population_with_scores = [{'chromosome': population[i], 'fitness_score': fitness_score_arr[i]} for i in range(len(fitness_score_arr))]

In [44]:
best_chromosome = sorted(population_with_scores, key = lambda x:x['fitness_score'], reverse=True)[0]
best_chromosome

{'chromosome': array([ 0.99708934,  0.73616887,  0.13923114,  0.23364004, -0.22806774,
        -0.36996678,  0.42757344,  1.46284563, -0.18471685]),
 'fitness_score': 0.04829179949929281}

In [45]:
predicted_rankings = np.dot(test_data[parameters], best_chromosome['chromosome'])
sorted_ranking_pr = sorted(predicted_rankings)

order_ranking = {value: index + 1 for index, value in enumerate(sorted_ranking_pr)}

result_pr = [order_ranking[value] for value in predicted_rankings]

In [46]:
test_data['ranking'] = result_pr

In [47]:
test_data = test_data.sort_values(by=['ranking']).iloc[::1]
test_data

Unnamed: 0,season_team_id,season_code,team_id,games_played,minutes,points,two_points_made,two_points_attempted,three_points_made,three_points_attempted,...,steals_per_game,turnovers_per_game,blocks_favour_per_game,blocks_against_per_game,fouls_committed_per_game,fouls_received_per_game,valuation_per_game,ranking,actual_ranking,PIR
342,E2023_MCO,E2023,MCO,39.0,1575.0,3189,849,1601,305,872,...,6.87,9.97,1.82,2.21,19.62,21.56,91.36,1,3.0,28.21
341,E2023_MAD,E2023,MAD,39.0,1590.0,3459,820,1414,407,1076,...,6.51,12.03,3.62,1.56,18.9,20.08,107.56,2,1.0,37.51
340,E2023_IST,E2023,IST,35.0,1425.0,2992,747,1346,348,940,...,7.03,10.34,3.14,2.14,18.31,18.51,96.51,3,10.0,35.46
338,E2023_BAS,E2023,BAS,39.0,1565.0,3300,752,1351,424,1143,...,5.51,12.46,2.21,3.38,18.18,18.79,93.05,4,9.0,33.17
351,E2023_ULK,E2023,ULK,41.0,1670.0,3449,759,1365,470,1199,...,5.85,12.07,2.22,1.73,21.39,19.73,92.2,5,7.0,35.53
345,E2023_OLY,E2023,OLY,41.0,1660.0,3229,772,1396,387,1030,...,7.17,12.15,2.61,2.41,18.93,19.61,91.24,6,4.0,30.35
350,E2023_TEL,E2023,TEL,40.0,1603.0,3508,919,1710,356,973,...,7.1,11.68,3.05,2.33,20.38,19.4,98.58,7,6.0,33.2
352,E2023_VIR,E2023,VIR,36.0,1440.0,2872,702,1295,338,936,...,6.64,12.31,2.69,2.19,21.58,19.56,86.69,8,8.0,29.92
349,E2023_RED,E2023,RED,34.0,1360.0,2764,680,1239,314,920,...,6.59,12.12,2.47,2.74,21.56,19.88,88.71,9,16.0,29.4
347,E2023_PAN,E2023,PAN,41.0,1653.0,3380,812,1475,377,966,...,6.76,12.46,2.68,2.41,17.68,21.63,95.32,10,2.0,31.22


In [48]:
rmse_lin_reg

2.4720661623652207

In [49]:
r2_lin_reg

0.7729618163054696

In [52]:
r2_genet = metrics.r2_score(test_data['actual_ranking'], test_data['ranking'])

In [53]:
r2_genet

0.4014447884416925

In [55]:
rmse_genet = metrics.root_mean_squared_error(test_data['actual_ranking'], test_data['ranking'])

In [56]:
rmse_genet

4.013864859597431