In [1]:
from src.data.make_dataset import *

In [2]:
data, edges = make_data()

In [5]:
from src.features.build_features import *

In [None]:
features, adj, labels = build_features(data, edges)

In [None]:
import pandas as pd
import numpy as np

In [None]:
abbr = [
    'PHO', 'DAL', 'POR', 'OKC', 'DEN', 'MEM', 'WAS', 'MIA', 'BRK', 'CLE', 'TOR', 
    'NOP', 'HOU', 'IND', 'LAC', 'PHI', 'SAC', 'UTA', 'LAL', 'BOS', 'ORL', 'MIL', 
    'SAS', 'ATL', 'GSW', 'CHI', 'NYK', 'DET', 'MIN', 'CHO'
]

In [None]:
years = [11, 12, 13, 14, 15, 16, 17, 18, 19]

data = pd.DataFrame()
edges = pd.DataFrame()

id_r = np.array(list(range(0, 30, 1)))

for yr in years:
    fp1 = 'data/features/feat20' + str(yr) + '.csv'
    fp2 = 'data/schedule/sch20' + str(yr)
    
    curr = pd.read_csv(fp1)
    
    curr['id'] = curr['Tm'].map(dict(zip(abbr, id_r)))
    
    data = data.append(curr)
    
    curr_edge = pd.read_csv(fp2)
    
    curr_edge['Home'] = curr_edge['Home'].map(dict(zip(abbr, id_r)))
    curr_edge['Away'] = curr_edge['Away'].map(dict(zip(abbr, id_r)))
    
    edges = edges.append(curr_edge)
    
    id_r = id_r + 30

In [None]:
nodes = list(data['id'])

In [None]:
edge_list = edges.values.tolist()

In [None]:
import networkx as nx

G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edge_list)

# A = nx.adjacency_matrix(G)

import matplotlib.pyplot as plt

plt.figure(figsize=(10,10))
nx.draw(G, node_size=10, edge_size=1)
plt.show()

In [None]:
A = nx.adjacency_matrix(G)

In [None]:
labels = list(data['Rk'])
labels = np.array(labels)

In [None]:
X = data.drop(['Tm', 'Rk', 'id'], axis=1).to_numpy()

In [None]:
import torch

In [None]:
adj = torch.Tensor(A.toarray())

In [None]:
features = torch.Tensor(X)

In [None]:
labels = torch.Tensor(labels)

In [None]:
import torch.optim as optim
import time

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import argparse

class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'


class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [None]:
class GCN_Trainer(object):
    def __init__(self, features, adj, labels):
        #do something
        self.model = GCN(nfeat=features.shape[1],
                        nhid=16,
                        nclass=len(labels.unique()) + 1,
                        dropout=0.5)
        self.optimizer = optim.Adam(self.model.parameters(),
                                   lr=.01, weight_decay=5e-4)
        self.features = features
        self.adj = adj
        self.labels = labels

    
    def train(self, epoch):
        t = time.time()
        self.model.train()
        self.optimizer.zero_grad()
        output = self.model(self.features, self.adj)
    
        print(output.shape, self.labels.shape)
    
        loss = nn.CrossEntropyLoss()
        loss_train = loss(output, self.labels.type(torch.LongTensor))
        acc_train = self.accuracy(output, self.labels)
        loss_train.backward()
        self.optimizer.step()


        loss_val = loss(output, self.labels.type(torch.LongTensor))
        acc_val = accuracy(output, self.labels)
    
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()),
              'time: {:.4f}s'.format(time.time() - t)) 
        
    def test(self):
        self.model.eval()
        output = self.model(features, adj)
        loss = nn.CrossEntropyLoss()
        loss_test = loss(output, self.labels.type(torch.LongTensor))
        acc_test = accuracy(output, self.labels)
    
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))
        
    def complete_train(self):
        t_total = time.time()
        for epoch in range(100):
            self.train(epoch)
        print("Optimization Finished!")
        print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

        # Testing
        self.test()
        
    def accuracy(self, output, labels):
        preds = output.max(1)[1].type_as(labels)
        correct = preds.eq(labels).double()
        correct = correct.sum()
        return correct / len(labels)

In [None]:
trainer = GCN_Trainer(features, adj, labels)

trainer.complete_train()