# [ Football Dataset Classification with MLP ]
- embedded with LINE (first-order proximity / negative sampling )

# 1. Importing libraries & dataset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
from imblearn.over_sampling import SMOTE

In [3]:
ev = pd.read_csv('[Football]Embedded_with_FirstOrder.csv')

In [4]:
ev = ev.drop(ev.columns[0],axis=1)

In [5]:
ev.shape

(115, 11)

In [6]:
ev.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Label
0,0.865126,0.732464,0.654681,-0.280288,-0.416516,0.77929,1.989182,0.944528,0.75891,0.924716,7
1,-0.315168,-1.665299,-0.98481,1.077798,0.511267,0.939566,1.635527,-0.366913,-0.451699,1.780345,0
2,-0.569846,-0.199044,1.78497,0.186517,2.154936,-0.550533,-0.93743,0.107572,1.074133,0.32642,2
3,0.832763,0.221549,-0.575225,-0.686977,-1.096524,0.453152,-0.012188,0.983878,0.942373,0.57072,3
4,0.117742,0.502898,0.749028,0.396632,-0.188808,0.286299,1.366271,-0.073079,-0.786144,-0.255506,7


In [8]:
sm = SMOTE(random_state=42,k_neighbors=2)
k = sm.fit_sample(ev.iloc[:,0:10],ev.iloc[:,10])        

In [16]:
ev2 = pd.DataFrame(k[0])
ev2['Label'] = k[1]
ev2 = ev2.sample(frac=1).reset_index(drop=True)

In [12]:
ev.shape, ev2.shape

((115, 11), (156, 11))

### SMOTE (X)

In [13]:
test_index1 = ev.groupby('Label').apply(lambda x: x.sample(frac=0.3)).index.levels[1]
train_index1 = set(np.arange(0,ev.shape[0])) - set(test_index1)

In [18]:
train1 = ev.loc[train_index1]
test1 = ev.loc[test_index1]

In [22]:
train_X1 = np.array(train1.iloc[:,0:10])
train_y1 = np.array(train1.iloc[:,10]).flatten()
test_X1 = np.array(test1.iloc[:,0:10])
test_y1 = np.array(test1.iloc[:,10]).flatten()

In [28]:
train_y1_dum = pd.get_dummies(train_y1).values
test_y1_dum = pd.get_dummies(test_y1).values

### SMOTE (O)

In [19]:
test_index2 = ev2.groupby('Label').apply(lambda x: x.sample(frac=0.3)).index.levels[1]
train_index2 = set(np.arange(0,ev2.shape[0])) - set(test_index2)

In [23]:
train2 = ev2.loc[train_index2]
test2 = ev2.loc[test_index2]

In [24]:
train_X2 = np.array(train2.iloc[:,0:10])
train_y2 = np.array(train2.iloc[:,10]).flatten()
test_X2 = np.array(test2.iloc[:,0:10])
test_y2 = np.array(test2.iloc[:,10]).flatten()

In [26]:
train_y2_dum = pd.get_dummies(train_y2).values
test_y2_dum = pd.get_dummies(test_y2).values

# 2. Define Functions

### Basic functions
- 1) train_test_split
- 2) standard scaler
- 3) transpose & matrix multiplication

In [30]:
def standard_scaler(x):
    mean = np.mean(x)
    std = np.std(x)
    return (x-mean)/std

In [31]:
def _t(X):
    return np.transpose(X)

def _m(A,B):
    return np.matmul(A,B)

### Activation functions
- 1) Sigmoid
- 2) Softmax

In [32]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1
    
    def __call__(self,X):
        self.last_o = 1/(1+np.exp(-X))
        return self.last_o
    
    def grad(self):
        return self.last_o*(1-self.last_o)

In [33]:
class Softmax:
    def __init__(self):
        self.last_o = 1
        
    def __call__(self,X):
        e_x = np.exp(X-np.max(X))
        self.last_o = e_x / e_x.sum()
        return self.last_o
    
    def grad(self):
        return self.last_o*(1-self.last_o)

### Loss Function

In [34]:
class LogLoss:
    def __init__(self):
        self.dh = 1
        self.last_diff = 1
    
    def __call__(self,y,yhat):
        self.last_diff = yhat-y
        total_loss = np.mean(y*np.log(yhat+(1e-5)) + (1-y)*np.log(1-yhat+(1e-5)))
        return -total_loss
    
    def grad(self):
        return self.last_diff

# 3. Network Architecture

### 1) Neuron

In [35]:
class Neuron :
    def __init__(self,W,b,activation):
        self.W = W
        self.b = b
        self.act= activation()
        
        self.dW = np.zeros_like(self.W)  
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W)) 
        
        self.last_x = np.zeros((self.W.shape[0])) 
        self.last_h = np.zeros((self.W.shape[1]))
        
    def __call__(self,x):
        self.last_x = x
        self.last_h = _m(_t(self.W),x) + self.b
        output = self.act(self.last_h)
        return output
    
    def grad(self): 
        grad = self.act.grad()*self.W
        return grad
    
    def grad_W(self,dh): 
        grad = np.ones_like(self.W) 
        grad_a = self.act.grad()   # dh/du     
        for j in range(grad.shape[1]):
            grad[:,j] = dh[j] * grad_a[j] * self.last_x     # previous gradient * dh/du * du/dW
        return grad
        
    def grad_b(self,dh) : # dh/db = dh/du * du/db
        grad = dh * self.act.grad() * 1  # previous gradient * dh/du * du/db
        return grad

### 2) Neural Network

In [36]:
class NN:
    def __init__(self,input_num,output_num,hidden_depth,num_neuron, 
                 activation=Sigmoid, activation2=Softmax): 
        def init_var(in_,out_):
            weight = np.random.uniform(-1,1,(in_,out_))
            bias = np.zeros((out_,))
            return weight,bias
           
    ## 1-1. Hidden Layer
        self.sequence = list() # lists to put neurons
        W,b = init_var(input_num,num_neuron)
        self.sequence.append(Neuron(W,b,activation))
    
        for _ in range(hidden_depth-1):
            W,b = init_var(num_neuron,num_neuron)
            self.sequence.append(Neuron(W,b,activation)) # default : Sigmoid
    
    ## 1-2. Output Layer
        W,b = init_var(num_neuron,output_num)
        self.sequence.append(Neuron(W,b,activation2)) # default : Softmax
    
    def __call__(self,x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_grad(self,loss_fun):
        loss_fun.dh = loss_fun.grad()
        self.sequence.append(loss_fun)
        
        for i in range(len(self.sequence)-1, 0, -1):
            L1 = self.sequence[i]
            L0 = self.sequence[i-1]
            
            L0.dh = _m(L0.grad(), L1.dh)
            L0.dW = L0.grad_W(L1.dh)
            L0.db = L0.grad_b(L1.dh)
            
        self.sequence.remove(loss_fun)   

### 3) Gradient Descent

In [37]:
def GD(nn,x,y,loss_fun,lr=0.01):
    loss = loss_fun(nn(x),y) # 1) FEED FORWARD
    nn.calc_grad(loss_fun) # 2) BACK PROPAGATION
    
    for layer in nn.sequence: # Update Equation
        layer.W += -lr*layer.dW
        layer.b += -lr*layer.db    
    return loss

In [39]:
train_y2_dum.shape

(108, 12)

# 4. Train Model

In [44]:
train_y1

array([ 7,  3,  7,  3,  8,  8,  7,  3, 10,  2,  6,  2,  7,  9,  6,  1,  9,
        8,  8,  7, 10,  0,  6, 11,  1,  2,  0,  6,  1,  0,  6,  2,  3,  5,
        6,  4,  2, 11, 10, 11,  6,  1,  9,  4, 11,  2,  2,  9,  8, 10,  9,
       11,  4,  9,  8,  8,  1,  5,  3,  6,  4, 11,  0,  5,  4,  4,  9, 10,
        3,  6,  2,  1,  3,  7,  0,  3,  0,  4,  9, 11], dtype=int64)

In [41]:
NeuralNet = NN(10,12,3,4,activation=Sigmoid, activation2=Softmax) # input_num, output_num, hidden_depth, num_layers
loss_fun = LogLoss()
EPOCH = 100

loss_per_epoch = []

for epoch in range(EPOCH):
    for i in range(train_X1.shape[0]):
        loss = GD(NeuralNet,train_X1[i],train_y1_dum[i],loss_fun,0.001)
    loss_per_epoch.append(loss)
    
    if epoch%10 ==0:
        print('Epoch {} : Loss {}'.format(epoch+1, loss))

Epoch 1 : Loss 1.8104908214972841
Epoch 11 : Loss 1.8113916016336296
Epoch 21 : Loss 1.8123271189251995
Epoch 31 : Loss 1.813300748226298
Epoch 41 : Loss 1.8143163713077843
Epoch 51 : Loss 1.8153784754495053
Epoch 61 : Loss 1.8164922755066275
Epoch 71 : Loss 1.8176638660287345
Epoch 81 : Loss 1.8189004120990535


KeyboardInterrupt: 

### case 2) train 30%

In [50]:
NeuralNet2 = NN(10,12,3,4,activation=Sigmoid, activation2=Softmax) # input_num, output_num, hidden_depth, num_layers
loss_fun = LogLoss()
EPOCH = 100

loss_per_epoch = []

for epoch in range(EPOCH):
    for i in range(train_X2.shape[0]):
        loss = GD(NeuralNet,train_X2[i],train_y2_dum[i],loss_fun,0.001)
    loss_per_epoch.append(loss)
    
    if epoch%10 ==0:
        print('Epoch {} : Loss {}'.format(epoch+1, loss))

Epoch 1 : Loss 1.8875529700242721
Epoch 11 : Loss 1.8915348173126514
Epoch 21 : Loss 1.896128997473119
Epoch 31 : Loss 1.9010245686334544
Epoch 41 : Loss 1.905596576751912
Epoch 51 : Loss 1.9092643848186015
Epoch 61 : Loss 1.9118815167054295
Epoch 71 : Loss 1.9136491164042617
Epoch 81 : Loss 1.9148370692308836
Epoch 91 : Loss 1.9156525639590634


# 5. Prediction

In [46]:
def predict(model,test_X):
    preds = []
    for i in range(test_X.shape[0]):
        pred_result = np.argmax(model(test_X[i]))
        preds.append(pred_result)
    return np.array(preds)

### 1) prediction result

In [49]:
predict(NeuralNet2,test_X2)

NameError: name 'NeuralNet2' is not defined

In [None]:
test_y_70

In [None]:
pred70

### 2) metrics

In [None]:
def Metrics(pred,actual):
    TP,TN,FP,FN = 0,0,0,0
    for i in range(len(pred)):
        if pred[i]*actual[i]==1:
            TP +=1
        elif pred[i]>actual[i]:
            FP +=1
        elif pred[i]<actual[i]:
            FN +=1
        else:
            TN +=1
    
    accuracy = (TP+TN) / (TP+TN+FP+FN)
    precision = TP / (TP+FP)
    recall = TP / (TP+FN)
    F1_score = 2*(precision*recall)/(precision+recall)
    return accuracy,precision,recall,F1_score

In [None]:
print('Training Dataset 70%')
actual_class_70 = (1-test_y_70)[:,0]
Metrics(pred70,actual_class_70)