In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [None]:
df = pd.read_csv('lol_ranked_games.csv', index_col=0)
df = df.drop_duplicates(subset=['match_id'])
df = df[df['game_duration_in_min'] >= 15 ]
df = df.drop('game_mode', axis=1)
df = df.drop('queue_id', axis=1)
df = df.drop('platform_id', axis=1)
df = df.drop('game_duration_in_ms', axis=1)
df = df.drop('total_blue_assists', axis=1)
df = df.drop('total_red_assists', axis=1)

missing_values = df.isnull().sum()
print(missing_values)

print(df.shape)
df.head()

In [None]:
df['total_kills'] = df['total_blue_kills'] + df['total_red_kills']
df['total_gold_earned'] = df['total_blue_gold_earned'] + df['total_red_gold_earned']
df['total_minions_killed'] = df['total_blue_minions_killed'] + df['total_red_minions_killed']
df['total_turret_kills'] = df['total_blue_turret_kills'] + df['total_red_turret_kills']
df['total_dragon_kills'] = df['total_blue_dragon_kills'] + df['total_red_dragon_kills']
df['total_damage_dealt_to_champions'] = df['total_blue_damage_dealt_to_champions'] + df['total_red_damage_dealt_to_champions']
df['total_baron_kills'] = df['total_blue_baron_kills'] + df['total_red_baron_kills']
df['avg_champ_level'] = (df['avg_blue_champ_level'] + df['avg_red_champ_level']) / 2
df['total_inhibitor_kills'] = df['total_blue_inhibitor_kills'] + df['total_red_inhibitor_kills']
df['total_items_purchased'] = df['total_blue_items_purchased'] + df['total_red_items_purchased']

df = df.drop(columns=[
    'total_blue_kills', 'total_red_kills', 
    'total_blue_gold_earned', 'total_red_gold_earned',
    'total_blue_minions_killed', 'total_red_minions_killed',
    'total_blue_turret_kills', 'total_red_turret_kills',
    'total_blue_dragon_kills', 'total_red_dragon_kills',
    'total_blue_damage_dealt_to_champions', 'total_red_damage_dealt_to_champions',
    'total_blue_baron_kills', 'total_red_baron_kills',
    'avg_blue_champ_level', 'avg_red_champ_level',
    'total_blue_inhibitor_kills', 'total_red_inhibitor_kills',
    'total_blue_items_purchased', 'total_red_items_purchased'
])

df_no_surr = df[df['surrender'] == False]
print(df.shape)
print(df_no_surr.shape)
df.head()

In [None]:
# wykres czestotliwosci gier od ich czasu trwania
def plot_game_duration_distribution(df):    
    plt.figure(figsize=(8, 4))
    plt.hist(df['game_duration_in_min'], bins=100, edgecolor='blue', color='lightblue')

    plt.xlabel('Czas trwania gry (min)')
    plt.ylabel('Częstotliwość')
    plt.show()

    min_duration = df['game_duration_in_min'].min()
    max_duration = df['game_duration_in_min'].max()
    mean_duration = df['game_duration_in_min'].mean()
    median_duration = df['game_duration_in_min'].median()

    statistics = {
        'Statistic': ['Minimum', 'Maximum', 'Mean', 'Median'],
        'Value': [min_duration, max_duration, mean_duration, median_duration]
    }
    stats_df = pd.DataFrame(statistics)

    print(stats_df)

In [None]:
plot_game_duration_distribution(df)
plot_game_duration_distribution(df_no_surr)

In [None]:
X = df[[
    'total_kills',
    'total_gold_earned',
    'total_minions_killed',
    'total_turret_kills',
    'total_dragon_kills',
    'total_damage_dealt_to_champions',
    'total_baron_kills',
    'avg_champ_level',
    'total_inhibitor_kills',
    'total_items_purchased'
]]
Y = df['game_duration_in_min']

correlations = X.corrwith(Y)
correlations_df = correlations.to_frame().reset_index()
correlations_df.columns = ['variable', 'game_duration_in_min']

variable_mapping = {
    'total_kills': 'Łączne zabójstwa',
    'total_gold_earned': 'Łącznie zarobione złoto',
    'total_minions_killed': 'Łączne zabójstwa minionów',
    'total_turret_kills': 'Łączne zniszczenia wież',
    'total_dragon_kills': 'Łączne zabójstwa smoków',
    'total_damage_dealt_to_champions': 'Łączne obrażenia zadane bohaterom',
    'total_baron_kills': 'Łączne zabójstwa Barona',
    'avg_champ_level': 'Średni poziom bohatera',
    'total_inhibitor_kills': 'Łączne zniszczenia inhibitorów',
    'total_items_purchased': 'Łącznie zakupione przedmioty'
}

correlations_df['variable'] = correlations_df['variable'].map(variable_mapping)
correlations_df.columns = ['variable', 'Czas trwania gry']

plt.figure(figsize=(8, 4))
sns.heatmap(correlations_df.set_index('variable'), annot=True, cmap='Blues', vmin=0, vmax=1)
plt.title('Korelacje między zmiennymi a czasem trwania gry')
plt.ylabel('')
plt.xlabel('')
plt.show()

In [None]:
# wykres przewidywania czasu trwania gry na podstawie jednej zmiennej, model regresji liniowej i GLM
def draw_plot_duration_on_one_var(X, Y, xlabel, degree=2):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model_lin = LinearRegression()
    model_lin.fit(X_train.reshape(-1, 1), Y_train)

    print(f'Linear model parameters: {np.round(model_lin.coef_,5)}, {np.round(model_lin.intercept_,5)}')
    mse_lin = mean_squared_error(Y_test, model_lin.predict(X_test.reshape(-1,1)))
    print(f'Mean squared error (lin): {mse_lin:0.3}\n')

    model_glm = LinearRegression()
    gen_features = PolynomialFeatures(degree, include_bias=True, interaction_only=False)
    model_glm.fit(gen_features.fit_transform(X_train.reshape(-1,1)), Y_train)

    print(f'GLM parameters: {np.round(model_glm.coef_,5)}, {np.round(model_glm.intercept_,5)}')
    mse_glm = mean_squared_error(Y_test, model_glm.predict(gen_features.fit_transform(X_test.reshape(-1,1))))
    print(f'Mean squared error (GLM): {mse_glm:0.3}\n')

    os_x = np.linspace(start=X.min(), stop=X.max(), num=300)
    y_lin_pred = model_lin.predict(os_x.reshape(-1,1))
    y_glm_pred = model_glm.predict(gen_features.fit_transform(os_x.reshape(-1,1)))

    plt.figure(figsize=(12,6))
    plt.scatter(X_train, Y_train, label='dane treningowe', alpha=0.7)
    plt.scatter(X_test, Y_test, edgecolor='black', facecolor='none', label='dane testujące')
    plt.plot(os_x, y_lin_pred, label='model liniowy', color='tab:orange')
    plt.plot(os_x, y_glm_pred, label=f'model GLM', color='tab:red')
    plt.xlabel(xlabel)
    plt.ylabel('Czas trwania gry (min)')
    plt.legend(fontsize=12, shadow=True, loc='lower right')
    plt.ylim([Y.min()-0.1, Y.max()+0.5])
    plt.grid(True)
    plt.show()


In [None]:
Y = df['game_duration_in_min'].values

draw_plot_duration_on_one_var(df['total_kills'].values, Y, 'Liczba zabójstw (suma)')
draw_plot_duration_on_one_var(df['total_gold_earned'].values, Y, 'Ilość zdobytego złota (suma)')
draw_plot_duration_on_one_var(df['total_minions_killed'].values, Y, 'Liczba zabitych minionów (suma)')
draw_plot_duration_on_one_var(df['total_turret_kills'].values, Y, 'Liczba zniszczonych wież (suma)')
draw_plot_duration_on_one_var(df['total_dragon_kills'].values, Y, 'Liczba zabitych smoków (suma)')
draw_plot_duration_on_one_var(df['total_damage_dealt_to_champions'].values, Y, 'Ilość obrażeń zadanych bohaterom (suma)')
draw_plot_duration_on_one_var(df['total_baron_kills'].values, Y, 'Liczba zabitych baronów (suma)')
draw_plot_duration_on_one_var(df['avg_champ_level'].values, Y, 'Średni level bohaterów')
draw_plot_duration_on_one_var(df['total_inhibitor_kills'].values, Y, 'Liczba zniszczonych inhibitorów (suma)')
draw_plot_duration_on_one_var(df['total_items_purchased'].values, Y, 'Liczba zakupionych przedmiotów (suma)')

In [None]:
# wykres przewidywania czasu trwania gry na podstawie wielu zmiennych, model regresji liniowej
def draw_plot_duration_on_vars_lin(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model_lin = LinearRegression()
    model_lin.fit(X_train, Y_train)
    Y_pred_lin = model_lin.predict(X_test)
    mse_lin = mean_squared_error(Y_test, Y_pred_lin) 

    print(f'Linear model parameters: {np.round(model_lin.coef_,5)}, {np.round(model_lin.intercept_,5)}')
    print(f'Mean squared error (lin): {mse_lin:0.3}\n')

    plt.figure(figsize=(12, 6))

    plt.scatter(Y_test, Y_pred_lin, color='blue', label='Predykcje - model liniowy', alpha=0.7)

    plt.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='red', lw=2, label='Idealne dopasowanie')

    plt.xlabel('Rzeczywisty czas trwania gry (min)')
    plt.ylabel('Przewidywany czas trwania gry (min)')
    plt.legend()
    plt.grid(True)
    plt.show()

# wykres przewidywania czasu trwania gry na podstawie wielu zmiennych, model GLM
def draw_plot_duration_on_vars_glm(X, Y, degree=2):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model_glm = LinearRegression()
    gen_features = PolynomialFeatures(degree, include_bias=True, interaction_only=False)
    model_glm.fit(gen_features.fit_transform(X_train), Y_train)
    Y_pred_glm = model_glm.predict(gen_features.fit_transform(X_test))
    mse_glm = mean_squared_error(Y_test, Y_pred_glm)     

    print(f'GLM parameters: {np.round(model_glm.coef_,5)}, {np.round(model_glm.intercept_,5)}')
    print(f'Mean squared error (GLM): {mse_glm:0.3}\n')

    plt.figure(figsize=(12, 6))

    plt.scatter(Y_test, Y_pred_glm, color='green', label='Predykcje - model GLM', alpha=0.7)

    plt.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='red', lw=2, label='Idealne dopasowanie')

    plt.xlabel('Rzeczywisty czas trwania gry (min)')
    plt.ylabel('Przewidywany czas trwania gry (min)')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
X = df[[
    'total_kills',
    'total_gold_earned',
    'total_minions_killed',
    'total_turret_kills',
    'total_dragon_kills',
    'total_damage_dealt_to_champions',
    'total_baron_kills',
    'avg_champ_level',
    'total_inhibitor_kills',
    'total_items_purchased'
]]
Y = df['game_duration_in_min']

draw_plot_duration_on_vars_lin(X, Y)
draw_plot_duration_on_vars_glm(X, Y)

In [None]:
X = df[[
    'total_gold_earned',
    'total_minions_killed',
    'total_damage_dealt_to_champions',
    'avg_champ_level',
    'total_items_purchased'
]]
Y = df['game_duration_in_min']

draw_plot_duration_on_vars_lin(X, Y)
draw_plot_duration_on_vars_glm(X, Y)

In [None]:
X = df[[
    'total_kills',
    'total_turret_kills',
    'total_dragon_kills',
    'total_baron_kills',
    'total_inhibitor_kills'
]]
Y = df['game_duration_in_min']

draw_plot_duration_on_vars_lin(X, Y)
draw_plot_duration_on_vars_glm(X, Y)

In [None]:
# kategoryzowanie gry do jednego z trzech kubelkow
def categorize_game_duration(duration):
        if duration <= 25:
            return 'short'
        elif duration <= 35:
            return 'medium'
        else:
            return 'long'

# macierz pomylek dla SVC
def draw_plot_duration_clf(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    clf = SVC(kernel='rbf')
    clf.fit(X_train, Y_train)

    Y_pred = clf.predict(X_test)

    print("Classification Report:")
    print(classification_report(Y_test, Y_pred))

    conf_matrix = confusion_matrix(Y_test, Y_pred)

    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', xticklabels=clf.classes_, yticklabels=clf.classes_)
    plt.xlabel('Przewidywana klasa')
    plt.ylabel('Rzeczywista klasa')
    plt.title('Macierz pomyłek')
    plt.show()

In [None]:
df['game_duration_category'] = df['game_duration_in_min'].apply(categorize_game_duration)

X = df[[
    'total_kills',
    'total_gold_earned',
    'total_minions_killed',
    'total_turret_kills',
    'total_dragon_kills',
    'total_damage_dealt_to_champions',
    'total_baron_kills',
    'avg_champ_level',
    'total_inhibitor_kills',
    'total_items_purchased'
]]
Y = df['game_duration_category']

draw_plot_duration_clf(X, Y)