In [223]:
import polars as pl
from polars import col
import torch

In [206]:
PATH = 'dataset\\2023_LoL_esports_match_data_from_OraclesElixir.csv'
COLUMNS_NEEDED = ['gameid', 'league', 'side', 'position', 'champion', 'result']
COLUMNS_TYPES = {
    'gameid': pl.Utf8,
    'league': pl.Utf8,
    'side': pl.Utf8,
    'position': pl.Utf8,
    'champion': pl.Utf8,
    'result': pl.Int8
}

In [225]:
df = pl.read_csv(
            source=PATH,
            has_header=True,
            columns=COLUMNS_NEEDED,
            dtypes=COLUMNS_TYPES
            ).drop_nulls()

In [226]:
assert df['gameid'].n_unique() == df.shape[0] / 10

In [227]:
LIST_OF_CHAMPIONS = df['champion'].unique().to_list()

CHAMP_TO_IDX = {champ: idx for idx, champ in enumerate(LIST_OF_CHAMPIONS)}

# Add the 'champion_idx' column
df = df.with_columns\
    (
    pl.col('champion') \
    .apply(lambda x: CHAMP_TO_IDX.get(x, x))
    )

In [228]:
# Group by 'gameid' and 'side', collect champion indices for each game and side
df = df.groupby(['gameid', 'league', 'side' ,'result']).agg(
    pl.col('champion').apply(list).alias('champions')
)

In [229]:
# Split the DataFrame into two based on 'side'
df_blue = df.filter(df['side'] == 'Blue').drop('side').rename({'champions': 'Blue_champions'})
df_red = df.filter(df['side'] == 'Red').drop('side').rename({'champions': 'Red_champions'})

df_blue = df_blue.with_columns\
    (
    pl.col('result').apply(lambda x: "Blue" if x == 1 else "Red").alias('result')
    )

df_red = df_red.with_columns\
    (
    pl.col('result').apply(lambda x: "Red" if x == 1 else "Blue").alias('result')
    )

assert (df_blue.sort(by="gameid")["result"]).eq(df_red.sort(by="gameid")["result"]).all()

# Join the two dataframes on 'gameid'
df = df_blue.join(df_red, on=['gameid', 'league', 'result'])

# Create new column where we have the result as a number (1 if Blue won, 0 if Red won)
df = df.with_columns\
    (
    pl.col('result').apply(lambda x: 1 if x == "Blue" else 0).alias('result_binary')
    )
    

In [260]:
df = df.sample(fraction=1, shuffle=True)
test_size = 20 * df.shape[0] // 100
test, train = df.head(test_size), df.tail(-test_size)

In [261]:
blue_champions_list_train = train['Blue_champions'].to_list()
red_champions_list_train = train['Red_champions'].to_list()
result_list_train = train['result_binary'].to_list()

In [262]:
blue_champions_list_test = test['Blue_champions'].to_list()
red_champions_list_test = test['Red_champions'].to_list()
result_list_test = test['result_binary'].to_list()

In [281]:
torch.save(torch.tensor(blue_champions_list_train), f="dataset\\blue_champions_train.pt")
torch.save(torch.tensor(red_champions_list_train), f="dataset\\red_champions_train.pt")
torch.save(torch.tensor(result_list_train), f="dataset\\result_train.pt")

torch.save(torch.tensor(blue_champions_list_test), f="dataset\\blue_champions_test.pt")
torch.save(torch.tensor(red_champions_list_test), f="dataset\\red_champions_test.pt")
torch.save(torch.tensor(result_list_test), f="dataset\\result_test.pt")

In [296]:
import sys, importlib
importlib.reload(sys.modules['dataset_cleaning'])
import dataset_cleaning

In [303]:
d = dataset_cleaning.generate_dataset()

In [298]:
from torch.utils.data import Dataset

class LeagueOfLegendsDataset(Dataset):
    def __init__(self, type="train"):
        
        assert type in ["train", "test"]
        
        self.blue_champions = torch.load(f=f"dataset\\blue_champions_{type}.pt")
        self.red_champions = torch.load(f=f"dataset\\red_champions_{type}.pt")
        self.result = torch.load(f=f"dataset\\result_{type}.pt")
        print("Dataset loaded")

    def __len__(self):
        assert len(self.blue_champions) == len(self.red_champions) == len(self.result)
        return len(self.blue_champions)

    def __getitem__(self, idx):
        return self.blue_champions[idx], self.red_champions[idx], self.result[idx]


In [299]:
from torch.utils.data import DataLoader

dataset_train = LeagueOfLegendsDataset(type="train")
data_loader_train = DataLoader(dataset_train, batch_size=1, shuffle=True)

dataset_test = LeagueOfLegendsDataset(type="test")
data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=True)

Dataset loaded
Dataset loaded


In [301]:
len(data_loader_train), len(data_loader_test)

(6226, 1556)

In [307]:
for i in data_loader_train:
    print(i)
    break

[tensor([[ 20,  50, 112,  96, 104]]), tensor([[ 58,  19,  41, 141,   9]]), tensor([1])]
