In [1]:
from DataLoader import DataLoader
import GraphManager
from GNN import HeteroGNN
import Learning
import numpy as np
import pandas as pd
import torch
import torch.optim
import torch_geometric
import torch_geometric.data
import networkx as nx
import matplotlib.pyplot as plt
from Config import GLOBALS, HYPERPARAETERS
from tqdm import tqdm
import pickle
import json
import torchinfo

In [2]:
dl = DataLoader('data/KaggleDataset_withBO.csv', 11) 
gm = GraphManager.GraphManager(dl, GLOBALS.DEVICE.value)

In [3]:
if not GLOBALS.already_saved.value:
    for league, league_df in dl.dataset.groupby('league'):
        print(f'Making {league} Graphs...')
        gm.make(
            league_df,
            mode='CW',
            validation_portion=HYPERPARAETERS.ValidationPortion.value,
            test_portion=HYPERPARAETERS.TestPortion.value,
            window_size=50,
            saveto=f'{GLOBALS.SavePath.value}{league}.gm'
        )
        print(f'Graph List Length: {len(gm.graph_list)}')
        print(f'Train Mask: {len(gm.train_mask)}')
        print(f'Validation Mask: {len(gm.validation_mask)}')
        print(f'Test Mask: {len(gm.test_mask)}')
        print(f'Saved in: {GLOBALS.SavePath.value}{league}.gm')
        print('='*80)

Making Belgium Jupiler League Graphs...


  0%|          | 0/189 [00:00<?, ?it/s]

Graph List Length: 189
Train Mask: 151
Validation Mask: 19
Test Mask: 19
Saved in: SavePath/Belgium Jupiler League.gm
Making England Premier League Graphs...


  0%|          | 0/304 [00:00<?, ?it/s]

Graph List Length: 304
Train Mask: 243
Validation Mask: 30
Test Mask: 31
Saved in: SavePath/England Premier League.gm
Making France Ligue 1 Graphs...


  0%|          | 0/304 [00:00<?, ?it/s]

Graph List Length: 304
Train Mask: 243
Validation Mask: 30
Test Mask: 31
Saved in: SavePath/France Ligue 1.gm
Making Germany 1. Bundesliga Graphs...


  0%|          | 0/272 [00:00<?, ?it/s]

Graph List Length: 272
Train Mask: 217
Validation Mask: 27
Test Mask: 28
Saved in: SavePath/Germany 1. Bundesliga.gm
Making Italy Serie A Graphs...


  0%|          | 0/304 [00:00<?, ?it/s]

Graph List Length: 304
Train Mask: 243
Validation Mask: 30
Test Mask: 31
Saved in: SavePath/Italy Serie A.gm
Making Netherlands Eredivisie Graphs...


  0%|          | 0/252 [00:00<?, ?it/s]

Graph List Length: 252
Train Mask: 201
Validation Mask: 25
Test Mask: 26
Saved in: SavePath/Netherlands Eredivisie.gm
Making Portugal Liga ZON Sagres Graphs...


  0%|          | 0/231 [00:00<?, ?it/s]

Graph List Length: 231
Train Mask: 184
Validation Mask: 23
Test Mask: 24
Saved in: SavePath/Portugal Liga ZON Sagres.gm
Making Scotland Premier League Graphs...


  0%|          | 0/304 [00:00<?, ?it/s]

Graph List Length: 304
Train Mask: 243
Validation Mask: 30
Test Mask: 31
Saved in: SavePath/Scotland Premier League.gm
Making Spain LIGA BBVA Graphs...


  0%|          | 0/304 [00:00<?, ?it/s]

Graph List Length: 304
Train Mask: 243
Validation Mask: 30
Test Mask: 31
Saved in: SavePath/Spain LIGA BBVA.gm


In [4]:
if GLOBALS.continue_training.value:
    model = torch.load(f'{GLOBALS.LoadPath.value}model.pth')
    with open(f'{GLOBALS.LoadPath.value}lists.pl', 'rb') as pf:
        loss_list, train_acc_list, eval_acc_list = pickle.load(pf)
else:
    model = HeteroGNN(
        embedding_dims=[dl.entities.shape[0], HYPERPARAETERS.EmbeddingDim.value],
        conv_dims=HYPERPARAETERS.ConvDims.value,
        fc_dims=HYPERPARAETERS.FCDims.value,
        dropout=HYPERPARAETERS.DropOuts.value,
        classify=True
    ).to(GLOBALS.DEVICE.value)
    model.reset_parameters()

    with open(f'{GLOBALS.SavePath.value}HyperParameters.json', 'w') as fp:
        json.dump({name: member.value for name, member in HYPERPARAETERS.__members__.items()}, fp, indent=4)
    loss_list = []
    train_acc_list = []
    eval_acc_list = []


criterion = torch.nn.NLLLoss()
# criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=HYPERPARAETERS.LearningRate.value) 

In [5]:
torchinfo.summary(model)

Layer (type:depth-idx)                   Param #
HeteroGNN                                --
├─Embedding: 1-1                         78,128
├─ModuleList: 1-2                        --
│    └─HeteroConv: 2-1                   --
│    │    └─ModuleDict: 3-1              1,360
│    └─HeteroConv: 2-2                   --
│    │    └─ModuleDict: 3-2              2,512
│    └─HeteroConv: 2-3                   --
│    │    └─ModuleDict: 3-3              2,512
├─ModuleList: 1-3                        --
│    └─Linear: 2-4                       528
│    └─Linear: 2-5                       136
│    └─Linear: 2-6                       27
├─LogSoftmax: 1-4                        --
Total params: 84,579
Trainable params: 84,579
Non-trainable params: 0

In [6]:
print('*********************************** Phase 1 ***********************************')
for round in range(HYPERPARAETERS.Phase1Rounds.value):
    print(f'############################## Round {round + 1} ##############################')
    for league, league_df in dl.dataset.groupby('league'):
        print(f'Training On: {league}')
        if GLOBALS.already_saved.value:
            gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
        else:
            gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
        try:
            max_acc = 0
            for epoch in range(HYPERPARAETERS.Phase1Epochs.value):
                loss, train_acc = Learning.train(
                    model=model,
                    graph_list=gm.graph_list,
                    train_indcs=gm.train_mask,
                    mode='RP',
                    criterion=criterion,
                    optimizer=optimizer,
                )

                val_acc = Learning.evaluation(
                    model=model,
                    graph_list=gm.graph_list,
                    eval_indcs=gm.validation_mask,
                    mode='RP'
                )

                print(f'=================================== EPOCH {epoch + 1} ===================================')
                print(f'Average Loss: {loss} - Train Accuracy: {train_acc: .3f} - Validation Accuracy: {val_acc: .3f}')

                loss_list.append(loss)
                train_acc_list.append(train_acc)
                eval_acc_list.append(val_acc)

                if val_acc >= max_acc:
                    max_acc = val_acc
                    torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_MaxVal_Ph1.pth')

                if (epoch+1) % GLOBALS.SaveEvery.value == 0:
                    torch.save(model, f'{GLOBALS.SavePath.value}model.pth')
                    with open(f'{GLOBALS.SavePath.value}lists.pl', 'wb') as pf:
                        pickle.dump((loss_list, train_acc_list, eval_acc_list), pf)
        except KeyboardInterrupt:
            pass
        torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_Ph1.pth')
    torch.save(model, f'{GLOBALS.SavePath.value}model_All_R{round+1}_Ph1.pth')

*********************************** Phase 1 ***********************************
############################## Round 1 ##############################
Training On: Belgium Jupiler League
Average Loss: 1.413246864415952 - Train Accuracy:  0.367
Validation Accuracy:  0.496
Average Loss: 1.140881172079124 - Train Accuracy:  0.404
Validation Accuracy:  0.496
Average Loss: 1.0910013156221403 - Train Accuracy:  0.444
Validation Accuracy:  0.496
Average Loss: 1.0786834513114778 - Train Accuracy:  0.464
Validation Accuracy:  0.496
Average Loss: 1.0693678192744982 - Train Accuracy:  0.458
Validation Accuracy:  0.496
Average Loss: 1.0591370688368942 - Train Accuracy:  0.462
Validation Accuracy:  0.496
Average Loss: 1.0608237425223093 - Train Accuracy:  0.457
Validation Accuracy:  0.496
Average Loss: 1.0555938048078524 - Train Accuracy:  0.458
Validation Accuracy:  0.496
Average Loss: 1.0629790192408277 - Train Accuracy:  0.462
Validation Accuracy:  0.496
Average Loss: 1.055216445433383 - Train Ac

In [7]:
acc_dict = {(f'R{round+1}', 'League'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)}
acc_dict.update({(f'R{round+1}', 'MaxVal'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)})
acc_dict.update({(f'R{round+1}', 'All'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)})
league_names = dl.dataset['league'].unique().tolist()
league_count = len(league_names)
for i, (league, league_df) in enumerate(dl.dataset.groupby('league')):
    if GLOBALS.already_saved.value:
        gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
    for round in range(HYPERPARAETERS.Phase1Rounds.value):
        tm = torch.load(f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_Ph1.pth')
        acc_on_league = Learning.evaluation(tm, gm.graph_list, gm.validation_mask, 'RP')
        acc_dict[(f'R{round+1}', 'League')].append(acc_on_league)

        tm = torch.load(f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_MaxVal_Ph1.pth')
        acc_on_maxval = Learning.evaluation(tm, gm.graph_list, gm.validation_mask, 'RP')
        acc_dict[(f'R{round+1}', 'MaxVal')].append(acc_on_maxval)
        
        tm = torch.load(f'{GLOBALS.SavePath.value}model_All_R{round+1}_Ph1.pth')
        acc_on_all = Learning.evaluation(tm, gm.graph_list, gm.validation_mask, 'RP')
        acc_dict[(f'R{round+1}', 'All')].append(acc_on_all)
        
eval_acc_df = pd.DataFrame(acc_dict, index=league_names).sort_index(axis='columns', level=0)
eval_acc_df

Unnamed: 0_level_0,R1,R1,R2,R2
Unnamed: 0_level_1,All,League,All,League
Belgium Jupiler League,0.47482,0.496403,0.517986,0.52518
England Premier League,0.496599,0.544218,0.452381,0.5
France Ligue 1,0.496479,0.521127,0.517606,0.517606
Germany 1. Bundesliga,0.504167,0.508333,0.495833,0.495833
Italy Serie A,0.472119,0.490706,0.472119,0.479554
Netherlands Eredivisie,0.526316,0.5311,0.497608,0.516746
Portugal Liga ZON Sagres,0.496732,0.51634,0.509804,0.51634
Scotland Premier League,0.490446,0.522293,0.452229,0.503185
Spain LIGA BBVA,0.532609,0.554348,0.51087,0.521739


In [8]:
acc_dict = {(f'R{round+1}', 'League'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)}
acc_dict.update({(f'R{round+1}', 'MaxVal'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)})
acc_dict.update({(f'R{round+1}', 'All'): [] for round in range(HYPERPARAETERS.Phase1Rounds.value)})
league_names = dl.dataset['league'].unique().tolist()
league_count = len(league_names)
for i, (league, league_df) in enumerate(dl.dataset.groupby('league')):
    if GLOBALS.already_saved.value:
        gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
    for round in range(HYPERPARAETERS.Phase1Rounds.value):
        tm = torch.load(f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_Ph1.pth')
        acc_on_league = Learning.evaluation(tm, gm.graph_list, gm.test_mask, 'RP')
        acc_dict[(f'R{round+1}', 'League')].append(acc_on_league)

        tm = torch.load(f'{GLOBALS.SavePath.value}model_{league}_R{round+1}_MaxVal_Ph1.pth')
        acc_on_maxval = Learning.evaluation(tm, gm.graph_list, gm.test_mask, 'RP')
        acc_dict[(f'R{round+1}', 'MaxVal')].append(acc_on_maxval)
        
        tm = torch.load(f'{GLOBALS.SavePath.value}model_All_R{round+1}_Ph1.pth')
        acc_on_all = Learning.evaluation(tm, gm.graph_list, gm.test_mask, 'RP')
        acc_dict[(f'R{round+1}', 'All')].append(acc_on_all)
        

test_acc_df = pd.DataFrame(acc_dict, index=league_names).sort_index(axis='columns', level=0)
test_acc_df

Unnamed: 0_level_0,R1,R1,R2,R2
Unnamed: 0_level_1,All,League,All,League
Belgium Jupiler League,0.431655,0.460432,0.460432,0.446043
England Premier League,0.421569,0.441176,0.401961,0.392157
France Ligue 1,0.462633,0.47331,0.47331,0.483986
Germany 1. Bundesliga,0.468254,0.464286,0.456349,0.460317
Italy Serie A,0.507299,0.50365,0.525547,0.507299
Netherlands Eredivisie,0.483254,0.497608,0.488038,0.488038
Portugal Liga ZON Sagres,0.529412,0.547059,0.535294,0.535294
Scotland Premier League,0.474684,0.481013,0.481013,0.5
Spain LIGA BBVA,0.572917,0.572917,0.569444,0.569444


In [14]:
eval_acc_df.apply(lambda z: z.idxmax(), axis=1)

Belgium Jupiler League      (R2, League)
England Premier League      (R1, League)
France Ligue 1              (R1, League)
Germany 1. Bundesliga       (R1, League)
Italy Serie A               (R1, League)
Netherlands Eredivisie      (R1, League)
Portugal Liga ZON Sagres    (R1, League)
Scotland Premier League     (R1, League)
Spain LIGA BBVA             (R1, League)
dtype: object

In [15]:
test_acc_df.apply(lambda z: z.idxmax(), axis=1)

Belgium Jupiler League      (R1, League)
England Premier League      (R1, League)
France Ligue 1              (R2, League)
Germany 1. Bundesliga          (R1, All)
Italy Serie A                  (R2, All)
Netherlands Eredivisie      (R1, League)
Portugal Liga ZON Sagres    (R1, League)
Scotland Premier League     (R2, League)
Spain LIGA BBVA                (R1, All)
dtype: object

In [11]:
test_acc_df.apply(lambda z: z.max(), axis=1) - [row[eval_acc_df.apply(lambda z: z.idxmax(), axis=1)[idx]] for idx, row in test_acc_df.iterrows()]

Belgium Jupiler League      0.014388
England Premier League      0.000000
France Ligue 1              0.010676
Germany 1. Bundesliga       0.003968
Italy Serie A               0.021898
Netherlands Eredivisie      0.000000
Portugal Liga ZON Sagres    0.000000
Scotland Premier League     0.018987
Spain LIGA BBVA             0.000000
dtype: float64

In [None]:
val_dic = {
    'epochs': [0] * eval_acc_df.shape[0],
    'accuracy': [0] * eval_acc_df.shape[0],
}
val_dic.update({e: [0] * eval_acc_df.shape[0] for e in [0, 1, 2, 5, 10]})
chosen_epochs = pd.DataFrame(
    val_dic, 
    index=eval_acc_df.index
)

for lig, (best_R, best_type) in eval_acc_df.apply(lambda z: z.idxmax(), axis=1).iteritems():
    print(f'Incremental Validation on league: {league}')
    if GLOBALS.already_saved.value:
        gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
    for epochs in [0, 1, 2, 5, 10]:
        model = torch.load(f'{GLOBALS.SavePath.value}model_{lig if best_type in ["League", "MaxVal"] else "All"}_{best_R}{"_MaxVal" if best_type == "MaxVal" else ""}_Ph1.pth')
        optimizer = torch.optim.Adam(model.parameters(), lr=HYPERPARAETERS.LearningRate.value)
        try:
            train_list = gm.train_mask.copy()
            t_correct = 0
            t_total = 0
            for idx in gm.validation_mask:
                print(f'--- Testing Week: {idx}')
                correct, total = Learning.evaluation(
                    model,
                    gm.graph_list,
                    [idx],
                    'RP',
                    return_counts=True
                )
                print(f'(Correct, Total): {(correct, total)}')
                t_correct += correct
                t_total += total

                train_list.append(idx)
                
                for epoch in range(epochs):
                    loss, train_acc = Learning.train(
                        model=model,
                        graph_list=gm.graph_list,
                        train_indcs=train_list,
                        mode='RP',
                        criterion=criterion,
                        optimizer=optimizer,
                    )

                    print(f'=================================== EPOCH {epoch + 1} ===================================')
                    print(f'Average Loss: {loss} - Train Accuracy: {train_acc: .3f}')

                    loss_list.append(loss)
                    train_acc_list.append(train_acc)


        except KeyboardInterrupt:
            pass
        current_val_acc = t_correct / t_total
        print(f'Total (Correct, Total): {(t_correct, t_total)} - Validation Accuracy: {current_val_acc: .3f}')
        chosen_epochs.loc[league, epochs] = current_val_acc
        torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_IncVal_{epochs}.pth')
        if current_val_acc > chosen_epochs.loc[league, 'accuracy']:
            chosen_epochs.loc[league, 'accuracy'] = current_val_acc
            chosen_epochs.loc[league, 'epochs'] = epochs
            torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_IncVal_best.pth')

In [None]:
chosen_posit = chosen_epochs.apply(lambda z: z.idxmax(), axis=1)
chosen_posit

In [9]:
print('*********************************** Phase 2 ***********************************')
for lig, (best_R, best_type) in eval_acc_df.apply(lambda z: z.idxmax(), axis=1).iteritems():
    model = torch.load(f'{GLOBALS.SavePath.value}model_{lig if best_type in ["League", "MaxVal"] else "All"}_{best_R}{"_MaxVal" if best_type == "MaxVal" else ""}_Ph1.pth')
    optimizer = torch.optim.Adam(model.parameters(), lr=HYPERPARAETERS.LearningRate.value)
    print(f'--------- Appending validation to League: {lig} - Chosen Model: {(best_R, best_type)}')
    
    for round in range(HYPERPARAETERS.Phase2Rounds.value):
        print(f'############################## Round {round + 1} ##############################')
        for league, league_df in dl.dataset.groupby('league'):
            print(f'Training On: {league}')
            if GLOBALS.already_saved.value:
                gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
            else:
                gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
            try:
                max_acc = 0
                train_indcs = gm.train_mask + gm.validation_mask
                for epoch in range(20):
                    loss, train_acc = Learning.train(
                        model=model,
                        graph_list=gm.graph_list,
                        train_indcs=train_indcs,
                        mode='RP',
                        criterion=criterion,
                        optimizer=optimizer,
                    )

                    print(f'=================================== EPOCH {epoch + 1} ===================================')
                    print(f'Average Loss: {loss} - Train Accuracy: {train_acc: .3f}')

                    loss_list.append(loss)
                    train_acc_list.append(train_acc)

                    # if val_acc >= max_acc:
                    #     max_acc = val_acc
                    #     torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_MaxVal_Ph1.pth')

                    if (epoch+1) % GLOBALS.SaveEvery.value == 0:
                        torch.save(model, f'{GLOBALS.SavePath.value}model.pth')
                        with open(f'{GLOBALS.SavePath.value}lists.pl', 'wb') as pf:
                            pickle.dump((loss_list, train_acc_list, eval_acc_list), pf)
            except KeyboardInterrupt:
                pass
            if f'R{round+1}' == best_R and best_type in ['League', 'MaxVal'] and league == lig:
                torch.save(model, f'{GLOBALS.SavePath.value}model_{lig}_Ph2.pth')
                break
        if f'R{round+1}' == best_R and best_type in ['League', 'MaxVal']: break
        if f'R{round+1}' == best_R and best_type == 'All':
            torch.save(model, f'{GLOBALS.SavePath.value}model_{lig}_Ph2.pth')
            break

*********************************** Phase 2 ***********************************
--------- Appending validation to League: Belgium Jupiler League - Chosen Model: ('R2', 'League')
############################## Round 1 ##############################
Training On: Belgium Jupiler League
Average Loss: 0.9707359366757529 - Train Accuracy:  0.533
Average Loss: 0.960866042460714 - Train Accuracy:  0.530
Average Loss: 0.9714794305392674 - Train Accuracy:  0.547
Average Loss: 0.9571980581113271 - Train Accuracy:  0.546
Average Loss: 0.9587869358062744 - Train Accuracy:  0.537
Average Loss: 0.9686476593358176 - Train Accuracy:  0.523
Average Loss: 0.9608397936820984 - Train Accuracy:  0.524
Average Loss: 0.9713824769535235 - Train Accuracy:  0.529
Average Loss: 0.9488220091376985 - Train Accuracy:  0.530
Average Loss: 0.9637408307620458 - Train Accuracy:  0.531
Average Loss: 0.9471052043778556 - Train Accuracy:  0.549
Average Loss: 0.9546338125637599 - Train Accuracy:  0.536
Average Loss: 0.94611

In [8]:
t_acc_list = []
for league, league_df in dl.dataset.groupby('league'):
    print(f'Testing On: {league}')
    model = torch.load(f'{GLOBALS.SavePath.value}model_{league}_Ph2.pth')
    optimizer = torch.optim.Adam(model.parameters(), lr=HYPERPARAETERS.LearningRate.value)
    if GLOBALS.already_saved.value:
        gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
    try:
        correct, total = Learning.evaluation(
            model,
            gm.graph_list,
            gm.test_mask,
            'RP',
            return_counts=True
        )
        t_acc_list.append((correct, total))
        print(f'Test Accuracy: {correct / total: .3f}')
    except KeyboardInterrupt:
        pass
print(f'Overal Accuracy: {np.sum(t_acc_list, axis=0)[0] / np.sum(t_acc_list, axis=0)[1]: .3f}')

Testing On: Belgium Jupiler League
Test Accuracy:  0.426
Testing On: England Premier League
Test Accuracy:  0.446
Testing On: France Ligue 1
Test Accuracy:  0.475
Testing On: Germany 1. Bundesliga
Test Accuracy:  0.508
Testing On: Italy Serie A
Test Accuracy:  0.518
Testing On: Netherlands Eredivisie
Test Accuracy:  0.514
Testing On: Poland Ekstraklasa
Test Accuracy:  0.531
Testing On: Portugal Liga ZON Sagres
Test Accuracy:  0.509
Testing On: Scotland Premier League
Test Accuracy:  0.500
Testing On: Spain LIGA BBVA
Test Accuracy:  0.542
Testing On: Switzerland Super League
Test Accuracy:  0.496


In [4]:
print('*********************************** Phase 3 ***********************************')
test_correct_list = []
for league, league_df in dl.dataset.groupby('league'):
    print(f'Testing On: {league}')
    model = torch.load(f'{GLOBALS.SavePath.value}model_{league}_Ph2.pth')
    optimizer = torch.optim.Adam(model.parameters(), lr=HYPERPARAETERS.LearningRate.value)
    if GLOBALS.already_saved.value:
        gm = GraphManager.load(f'{GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{GLOBALS.SavePath.value}{league}.gm')
    try:
        train_list = gm.train_mask + gm.validation_mask
        t_correct = 0
        t_total = 0
        for idx in gm.test_mask:
            print(f'--- Testing Week: {idx}')
            correct, total = Learning.evaluation(
                model,
                gm.graph_list,
                [idx],
                'RP',
                return_counts=True
            )
            print(f'(Correct, Total): {(correct, total)}')
            t_correct += correct
            t_total += total

            train_list.append(idx)
            
            for epoch in range(chosen_posit[league]):
                loss, train_acc = Learning.train(
                    model=model,
                    graph_list=gm.graph_list,
                    train_indcs=train_list,
                    mode='RP',
                    criterion=criterion,
                    optimizer=optimizer,
                )

                print(f'=================================== EPOCH {epoch + 1} ===================================')
                print(f'Average Loss: {loss} - Train Accuracy: {train_acc: .3f}')

                loss_list.append(loss)
                train_acc_list.append(train_acc)

    except KeyboardInterrupt:
        pass
    print(f'Total (Correct, Total): {(t_correct, t_total)} - Test Accuracy: {t_correct / t_total: .3f}')
    test_correct_list.append((t_correct, t_total))
    torch.save(model, f'{GLOBALS.SavePath.value}model_{league}_Ph3.pth')
    
with open(f'{GLOBALS.SavePath.value}test_list.pl', 'wb') as pf:
    pickle.dump(test_correct_list, pf)

*********************************** Phase 3 ***********************************
Testing On: Belgium Jupiler League
--- Testing Week: 175
(Correct, Total): (5, 8)
Average Loss: 1.0326036898927256 - Train Accuracy:  0.473
Average Loss: 1.0251176001673394 - Train Accuracy:  0.494
--- Testing Week: 176
(Correct, Total): (3, 8)
Average Loss: 1.0135825419493314 - Train Accuracy:  0.499
Average Loss: 1.0203391114870708 - Train Accuracy:  0.492
--- Testing Week: 177
(Correct, Total): (3, 8)
Average Loss: 1.0072978475455487 - Train Accuracy:  0.496
Average Loss: 1.0158864452262943 - Train Accuracy:  0.512
--- Testing Week: 178
(Correct, Total): (4, 8)
Average Loss: 1.0035935610366267 - Train Accuracy:  0.515
Average Loss: 1.0076780742107156 - Train Accuracy:  0.483
--- Testing Week: 179
(Correct, Total): (3, 8)
Average Loss: 0.9982454240322113 - Train Accuracy:  0.491
Average Loss: 1.001880232989788 - Train Accuracy:  0.498
--- Testing Week: 180
(Correct, Total): (3, 7)
Average Loss: 1.00463508

In [None]:
test_acc = np.array(test_correct_list)
pd.DataFrame(test_acc[:, 0] / test_acc[:, 1], index=dl.dataset['league'].unique())

In [None]:
np.sum(test_acc, axis=0)[0] / np.sum(test_acc, axis=0)[1]

In [None]:
#Run Everything Above

## Test Cells - DO NOT RUN

In [None]:
#Results Prediction
print('*********************************** Phase 1 ***********************************')
Learning.Phase1(dl, model, criterion, optimizer, Learning.train_step, Learning.evaluation, already_saved, 2, 100, loss_list, train_acc_list, eval_acc_list)
print('*********************************** Phase 2 ***********************************')
Learning.Phase2(dl, model, criterion, optimizer, Learning.train_step, Learning.evaluation, already_saved, 1, 20, loss_list, train_acc_list, eval_acc_list)
print('*********************************** Phase 3 ***********************************')
Learning.Phase3(dl, model, criterion, optimizer, Learning.train_step, Learning.evaluation, already_saved, 1, loss_list, train_acc_list, eval_acc_list)

In [7]:
#Goal Diff
print('*********************************** Phase 1 ***********************************')
Learning.Phase1(dl, model, criterion, optimizer, Learning.goal_diff_train_step, Learning.goal_diff_evaluation, already_saved, 2, 100, loss_list, train_acc_list, eval_acc_list)
print('*********************************** Phase 2 ***********************************')
# Learning.Phase2(dl, model, criterion, optimizer, Learning.goal_diff_train_step, Learning.goal_diff_evaluation, already_saved, 1, 20, loss_list, train_acc_list, eval_acc_list)
print('*********************************** Phase 3 ***********************************')
# Learning.Phase3(dl, model, criterion, optimizer, Learning.goal_diff_train_step, Learning.goal_diff_evaluation, already_saved, 1, loss_list, train_acc_list, eval_acc_list)

*********************************** Phase 1 ***********************************
############################## Round 1 ##############################
Training On: Belgium Jupiler League
Average Loss: 3.3410798667106203 - Train Accuracy:  0.291
Validation Accuracy:  0.254
Average Loss: 3.2804844480676527 - Train Accuracy:  0.303
Validation Accuracy:  0.254
Average Loss: 3.160364084280072 - Train Accuracy:  0.297
Validation Accuracy:  0.254
Average Loss: 3.0729300072655463 - Train Accuracy:  0.329
Validation Accuracy:  0.254
Average Loss: 3.1245527452048965 - Train Accuracy:  0.285
Validation Accuracy:  0.254
Average Loss: 3.1382795953167935 - Train Accuracy:  0.297
Validation Accuracy:  0.254
Average Loss: 3.159610539292678 - Train Accuracy:  0.263
Validation Accuracy:  0.254
Average Loss: 3.0588563959329176 - Train Accuracy:  0.312
Validation Accuracy:  0.254
Average Loss: 3.07476176507771 - Train Accuracy:  0.325
Validation Accuracy:  0.254
Average Loss: 3.0206543299823236 - Train Acc

In [6]:
model = torch.load(f'Runs/26Jan_2/model.pth')

In [6]:
for league, league_df in dl.dataset.groupby('league'):
    print(f'Evaluating On: {league}')
    if already_saved:
        gm = GraphManager.load(f'{Utils.GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{Utils.GLOBALS.SavePath.value}{league}.gm')
    
    t_correct = 0
    t_total = 0

    for idx in gm.validation_mask:
        g = gm.graph_list[idx]
        correct, total = Learning.goal_diff_evaluation(model, g)
        t_correct += correct
        t_total += total
    print(f'Test Accuracy: {t_correct / t_total: .3f}')

Evaluating On: Belgium Jupiler League
Test Accuracy:  0.415
Evaluating On: England Premier League
Test Accuracy:  0.407
Evaluating On: France Ligue 1
Test Accuracy:  0.409
Evaluating On: Germany 1. Bundesliga
Test Accuracy:  0.346
Evaluating On: Italy Serie A
Test Accuracy:  0.419
Evaluating On: Netherlands Eredivisie
Test Accuracy:  0.415
Evaluating On: Poland Ekstraklasa
Test Accuracy:  0.324
Evaluating On: Portugal Liga ZON Sagres
Test Accuracy:  0.391
Evaluating On: Scotland Premier League
Test Accuracy:  0.308
Evaluating On: Spain LIGA BBVA
Test Accuracy:  0.408
Evaluating On: Switzerland Super League
Test Accuracy:  0.386


In [7]:
for league, league_df in dl.dataset.groupby('league'):
    print(f'Testing On: {league}')
    if already_saved:
        gm = GraphManager.load(f'{Utils.GLOBALS.LoadPath.value}{league}.gm')
    else:
        gm = GraphManager.load(f'{Utils.GLOBALS.SavePath.value}{league}.gm')
    
    t_correct = 0
    t_total = 0

    for idx in gm.test_mask:
        g = gm.graph_list[idx]
        correct, total = evaluation(model, g)
        t_correct += correct
        t_total += total
    print(f'Test Accuracy: {t_correct / t_total: .3f}')

Testing On: Belgium Jupiler League
Test Accuracy:  0.452
Testing On: England Premier League
Test Accuracy:  0.433
Testing On: France Ligue 1
Test Accuracy:  0.486
Testing On: Germany 1. Bundesliga
Test Accuracy:  0.484
Testing On: Italy Serie A
Test Accuracy:  0.500
Testing On: Netherlands Eredivisie
Test Accuracy:  0.495
Testing On: Poland Ekstraklasa
Test Accuracy:  0.448
Testing On: Portugal Liga ZON Sagres
Test Accuracy:  0.539
Testing On: Scotland Premier League
Test Accuracy:  0.513
Testing On: Spain LIGA BBVA
Test Accuracy:  0.524
Testing On: Switzerland Super League
Test Accuracy:  0.551


In [13]:
loss

428

In [8]:
model(gm.graph_list[3])

tensor([[-2.7374e+00, -8.8250e+00, -6.7085e-02],
        [-1.1629e+01, -2.9417e+01, -8.9407e-06]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward0>)

In [8]:
dl.dataset.loc[dl.dataset['league'] == 'Belgium Jupiler League', :]

Unnamed: 0,league,season,week,home_team,away_team,result,home_lineup,away_lineup
0,Belgium Jupiler League,2008/2009,24,KV Mechelen,KRC Genk,win,"[Wouter Biebauw, Kenny van Hoevelen, Nana Asar...","[Davino Verhulst, Joao Carlos, Dimitri Daesela..."
1,Belgium Jupiler League,2008/2009,25,KSV Cercle Brugge,Club Brugge KV,loss,"[Bram Verbist, Denis Viane, Anthony Portier, F...","[Stijn Stijnen, Michael Klukowski, Antolin Alc..."
2,Belgium Jupiler League,2008/2009,25,RSC Anderlecht,SV Zulte-Waregem,win,"[Davy Schollen, Olivier Deschacht, Arnold Krui...","[Sammy Bossuyt, Karel D'Haene, Stijn Minne, Ba..."
3,Belgium Jupiler League,2008/2009,26,KV Mechelen,RSC Anderlecht,win,"[Wouter Biebauw, Kenny van Hoevelen, Nana Asar...","[Davy Schollen, Olivier Deschacht, Roland Juha..."
4,Belgium Jupiler League,2008/2009,26,SV Zulte-Waregem,KSV Roeselare,tie,"[Sammy Bossuyt, Karel D'Haene, Steve Colpaert,...","[Jurgen Sierens, Damir Mirvic, Mladen Lazarevi..."
...,...,...,...,...,...,...,...,...
1209,Belgium Jupiler League,2015/2016,30,SV Zulte-Waregem,Royal Excel Mouscron,win,"[Kenny Steppe, Henrik Dalsgaard, Christophe Le...","[Vagner, Noe Dussenne, Mickael Tirpan, Jean-Ch..."
1210,Belgium Jupiler League,2015/2016,30,Sporting Charleroi,KAA Gent,tie,"[Nicolas Penneteau, Damien Marcq, Gjoko Zajkov...","[Matz Sels, Lasse Nielsen, Rami Gershon, Nana ..."
1211,Belgium Jupiler League,2015/2016,30,Oud-Heverlee Leuven,Club Brugge KV,loss,"[Rudy Riou, Jordan Remacle, Kanu, Romain Reyna...","[Ludovic Butelle, Thomas Meunier, Bjorn Engels..."
1212,Belgium Jupiler League,2015/2016,30,KVC Westerlo,Waasland-Beveren,win,"[Kristof van Hout, Mitch Apau, Kenneth Schuerm...","[Laurent Henkinet, Hugo Sousa, Gary Coulibaly,..."


In [9]:
ht = gm._gen_heterodata(dl.dataset.loc[dl.dataset['league'] == 'Belgium Jupiler League', :], supervision_indcs=dl.dataset.loc[dl.dataset['league'] == 'Belgium Jupiler League', :].index[-20:])

In [10]:
ht

HeteroData(
  [1mteam[0m={ x=[2428] },
  [1mplayer[0m={ x=[26708] },
  [1m(team, used, player)[0m={ edge_index=[2, 26708] },
  [1m(player, playedin, team)[0m={ edge_index=[2, 26708] },
  [1m(team, win, team)[0m={ edge_index=[2, 899] },
  [1m(team, loss, team)[0m={ edge_index=[2, 899] },
  [1m(team, tie, team)[0m={ edge_index=[2, 590] },
  [1m(team, before, team)[0m={ edge_index=[2, 2406] },
  [1m(team, after, team)[0m={ edge_index=[2, 2406] },
  [1m(player, before, player)[0m={ edge_index=[2, 25641] },
  [1m(player, after, player)[0m={ edge_index=[2, 25641] }
)

In [8]:
ht.to_homogeneous()

Data(node_type=[96], x=[96], edge_index=[2, 184], edge_type=[184])

In [9]:
hm = ht.to_homogeneous()
g = torch_geometric.utils.to_networkx(hm)
pos = nx.spring_layout(g)  # positions for all nodes


node_labels = {}
for i, n in enumerate(hm.node_type.cpu().numpy()):
    node_labels[i] = 'team' if n==0 else 'player'

edge_list = hm.edge_index.T.cpu().tolist()
edge_labels = {}
for i, e in enumerate(hm.edge_type.cpu().numpy()):
    if e==0 or e==1: edge_labels[tuple(edge_list[i])] = 'used - played_in'
    elif e==2 or e==3: edge_labels[tuple(edge_list[i])] = 'won - loss'

fig, ax = plt.subplots()
nx.draw_networkx_nodes(
    g,
    ax=ax,
    pos=pos)
nx.draw_networkx_labels(g,pos=pos, ax=ax, labels=node_labels)
nx.draw_networkx_edges(g, pos, ax=ax, connectionstyle='arc3,rad=0.1')
nx.draw_networkx_edge_labels(g, pos=pos, ax=ax, edge_labels=edge_labels)
fig.set_size_inches((100, 100))
fig.savefig('a.png')

NameError: name 'ht' is not defined

In [7]:
for i, e in enumerate(hm.edge_index.cpu().numpy().T):
    print(i)
    print(e)
    break

0
[0 2]


In [8]:
np.moveaxis(np.stack((
            dl.DatasetDataframetoNumpy(dl.dataset)[2],
            dl.DatasetDataframetoNumpy(dl.dataset)[3]
        )), 0, 1).shape


(21309, 2, 11)

In [9]:
dl.DatasetDataframetoNumpy(dl.dataset)[2].shape

(21309, 11)

In [10]:
team_node_features = dl.labeler.transform(np.moveaxis(np.stack((
            dl.DatasetDataframetoNumpy(dl.dataset)[2],
            dl.DatasetDataframetoNumpy(dl.dataset)[3]
        )), 0, 1).flatten())

In [11]:
for i, r in dl.dataset.iterrows():
    print(r['week'])
    break


24


In [12]:
np.stack((
    dl.DatasetDataframetoNodeText(dl.dataset)[0],
    dl.DatasetDataframetoNodeText(dl.dataset)[1]
    )).T.flatten()

array(['KV Mechelen*0', 'KRC Genk*0', 'KSV Cercle Brugge*1', ...,
       'BSC Young Boys*21307', 'FC Zürich*21308', 'FC Vaduz*21308'],
      dtype='<U34')

In [13]:
a = dl.DatasetDataframetoNodeText(dl.dataset)
np.arange(a[0].shape[0] * 2)
t = pd.Series(
    np.arange(a[0].shape[0] * 2),
    index=np.stack((
        dl.DatasetDataframetoNodeText(dl.dataset)[0],
        dl.DatasetDataframetoNodeText(dl.dataset)[1]
    )).T.flatten()
)

In [14]:
p = pd.Series(
    np.arange(a[2].shape[0] * a[2].shape[1] * 2),
    index=np.moveaxis(np.stack((
        dl.DatasetDataframetoNodeText(dl.dataset)[2],
        dl.DatasetDataframetoNodeText(dl.dataset)[3]
    )), 0, 1).flatten())

In [15]:
p.to_numpy()

array([     0,      1,      2, ..., 468795, 468796, 468797])

In [16]:
aaa=np.repeat(t.to_numpy(), dl.minimum_players_per_team)

In [17]:
ttt = torch.stack((
    torch.tensor(aaa),
    torch.tensor(p)
)).T

In [18]:
aaa.shape

(468798,)

In [19]:
np.arange(a[2].shape[0] * a[2].shape[1] * 2).reshape(a[2].shape[0], -1, a[2].shape[1])

array([[[     0,      1,      2, ...,      8,      9,     10],
        [    11,     12,     13, ...,     19,     20,     21]],

       [[    22,     23,     24, ...,     30,     31,     32],
        [    33,     34,     35, ...,     41,     42,     43]],

       [[    44,     45,     46, ...,     52,     53,     54],
        [    55,     56,     57, ...,     63,     64,     65]],

       ...,

       [[468732, 468733, 468734, ..., 468740, 468741, 468742],
        [468743, 468744, 468745, ..., 468751, 468752, 468753]],

       [[468754, 468755, 468756, ..., 468762, 468763, 468764],
        [468765, 468766, 468767, ..., 468773, 468774, 468775]],

       [[468776, 468777, 468778, ..., 468784, 468785, 468786],
        [468787, 468788, 468789, ..., 468795, 468796, 468797]]])

In [20]:
dl.dataset.loc[dl.dataset['result']=='win', :].index.values

array([    0,     2,     3, ..., 21297, 21305, 21308])

In [22]:
dl.DatasetDataframetoNodeText(
                dl.dataset.loc[dl.dataset['result'] == 'win', :]
            )

(array(['KV Mechelen*0', 'RSC Anderlecht*2', 'KV Mechelen*3', ...,
        'FC St. Gallen*21297', 'Lugano*21305', 'FC Zürich*21308'],
       dtype='<U34'),
 array(['KRC Genk*0', 'SV Zulte-Waregem*2', 'RSC Anderlecht*3', ...,
        'FC Zürich*21297', 'FC St. Gallen*21305', 'FC Vaduz*21308'],
       dtype='<U34'),
 array([['Wouter Biebauw@0', 'Kenny van Hoevelen@0', 'Nana Asare@0', ...,
         'Romeo van Dessel@0', 'Wouter Vrancken@0', 'Giuseppe Rossini@0'],
        ['Davy Schollen@2', 'Olivier Deschacht@2', 'Arnold Kruiswijk@2',
         ..., 'Mbark Boussoufa@2', 'Oleksandr Iakovenko@2',
         'Tom De Sutter@2'],
        ['Wouter Biebauw@3', 'Kenny van Hoevelen@3', 'Nana Asare@3', ...,
         'Romeo van Dessel@3', 'Bjoern Vleminckx@3', 'Giuseppe Rossini@3'],
        ...,
        ['Daniel Lopar@21297', 'Alain Wiss@21297', 'Silvan Hefti@21297',
         ..., 'Danijel Aleksic@21297', 'Marco Aratore@21297',
         'Edgar Salli@21297'],
        ['Mirko Salvi@21305', 'Frederic Vese