# [ Football Dataset Classification with Logistic Regression ]
- embedded with LINE (first-order proximity / negative sampling )

# 1. Import Libraries & Dataset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
from imblearn.over_sampling import SMOTE

In [3]:
ev = pd.read_csv('[Football]Embedded_with_FirstOrder.csv')

In [4]:
ev = ev.drop(ev.columns[0],axis=1)

In [5]:
ev.shape

(115, 11)

In [6]:
ev.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,Label
0,0.865126,0.732464,0.654681,-0.280288,-0.416516,0.77929,1.989182,0.944528,0.75891,0.924716,7
1,-0.315168,-1.665299,-0.98481,1.077798,0.511267,0.939566,1.635527,-0.366913,-0.451699,1.780345,0
2,-0.569846,-0.199044,1.78497,0.186517,2.154936,-0.550533,-0.93743,0.107572,1.074133,0.32642,2
3,0.832763,0.221549,-0.575225,-0.686977,-1.096524,0.453152,-0.012188,0.983878,0.942373,0.57072,3
4,0.117742,0.502898,0.749028,0.396632,-0.188808,0.286299,1.366271,-0.073079,-0.786144,-0.255506,7


In [7]:
ev['Label'].value_counts()

6     13
9     12
3     12
2     11
11    10
8     10
4     10
0      9
7      8
1      8
10     7
5      5
Name: Label, dtype: int64

In [8]:
sm = SMOTE(random_state=42,k_neighbors=2)
k = sm.fit_sample(ev.iloc[:,0:10],ev.iloc[:,10])        

In [9]:
ev2 = pd.DataFrame(k[0])

In [10]:
ev2['Label'] = k[1]

In [11]:
ev2 = ev2.sample(frac=1).reset_index(drop=True)

In [12]:
ev2['Label'].value_counts()

11    13
10    13
9     13
8     13
7     13
6     13
5     13
4     13
3     13
2     13
1     13
0     13
Name: Label, dtype: int64

### train & test split
- proportional to each 'class' (1~12)

SMOTE (X)

In [13]:
test_index1 = ev.groupby('Label').apply(lambda x: x.sample(frac=0.3)).index.levels[1]
train_index1 = set(np.arange(0,ev.shape[0])) - set(test_index1)

In [14]:
train1 = ev.loc[train_index1]
test1 = ev.loc[test_index1]

In [15]:
train_X1 = np.array(train1.iloc[:,0:10])
train_y1 = np.array(train1.iloc[:,10]).flatten()
test_X1 = np.array(test1.iloc[:,0:10])
test_y1 = np.array(test1.iloc[:,10]).flatten()

In [16]:
train_X1.shape, test_X1.shape, train_y1.shape, test_y1.shape

((80, 10), (35, 10), (80,), (35,))

SMOTE (O)

In [17]:
test_index2 = ev2.groupby('Label').apply(lambda x: x.sample(frac=0.3)).index.levels[1]
train_index2 = set(np.arange(0,ev2.shape[0])) - set(test_index2)

In [18]:
train2 = ev2.loc[train_index2]
test2 = ev2.loc[test_index2]

In [19]:
train_X2 = np.array(train2.iloc[:,0:10])
train_y2 = np.array(train2.iloc[:,10]).flatten()
test_X2 = np.array(test2.iloc[:,0:10])
test_y2 = np.array(test2.iloc[:,10]).flatten()

In [20]:
train_X2.shape, test_X2.shape, train_y2.shape, test_y2.shape

((108, 10), (48, 10), (108,), (48,))

# 2. Define Functions

- 1) matrix multiplication
- 2) sigmoid
- 3) standard scaler
- 4) loss function

In [21]:
def mul(W,b,x):
    return np.dot(x,W)+b

def sigmoid(x):    
    k = 1 / (1 + np.exp(-x+0.0001))
    return k[:,0]

In [22]:
def standard_scaler(x):
    mean = np.mean(x)
    std = np.std(x)
    return (x-mean)/std

In [23]:
def loss_func(y_hat,y):
    total_loss = np.mean(y*np.log(y_hat+0.0001) + (1-y)*np.log(1-y_hat+0.0001))
    return -total_loss

# 3. Train Model

### Logistic Regression

In [24]:
def predict(test_X,W,b):
    result = sigmoid(np.dot(test_X, W) + b)
    return result

In [43]:
def logreg(x,y,epoch,lr):
    W = np.random.rand(x.shape[1],1)
    b = np.random.rand(1)
    
    for ep in range(epoch+1):
        Z = mul(W,b,x)
        y_hat = sigmoid(Z)
        loss = loss_func(y_hat,y)
        dw = np.matmul(x.T,y_hat-y)/x.shape[0]
        db = np.sum(y_hat-y)
        
        W = W-lr*dw.reshape(-1,1)
        b = b-lr*db
        
        if ep>0 and ep % 10000 == 0:
            print('epoch :',ep,' loss :',loss)
    print('------------------------------------------ final loss :',loss,'---')   
    return W,b

### OVR (One-Versus-Rest)

In [44]:
def OVR(train_x,train_y,test_x,test_y,epoch,lr):
    pred_result = []
    real_result = []
    for index in ev['Label'].unique():
        train_y2 = (train_y == index).astype(int)        
        test_y2 = (test_y == index).astype(int)
        
        
        ''' oversampling with SMOTE in OVR
        
        sm = SMOTE(random_state=42,k_neighbors=3)
        smote_x,smote_y = sm.fit_sample(train_x,train_y2)
        
        ind = np.arange(smote_x.shape[0])
        np.random.shuffle(ind)
        
        smote_x,smote_y = smote_x[ind],smote_y[ind]
        
        W,b = logreg(smote_x,smote_y,epoch,lr)
        print('------------------------------------------ Classifier ',index,'done---')
        
        '''
        W,b = logreg(train_x,train_y2,epoch,lr)
        y_pred = predict(test_x,W,b)
        pred_result.append(y_pred)
        real_result.append(test_y2)
    pred_OH = (pred_result == np.amax(pred_result,axis=0)).astype('int')
    act_OH = np.concatenate(real_result).ravel().reshape(ev.iloc[:,-1].nunique(),-1)    
    return pred_OH,act_OH

In [45]:
def confusion_matrix(actual,prediction):
    n = actual.shape[0]
    conf_mat = np.zeros((n,n))
    for i in range(n):
        for j in range(n):
            conf_mat[i][j] += len(np.intersect1d(np.nonzero(actual[i]),np.nonzero(prediction[j])))        
    return conf_mat

# 4. Result

###  1. SMOTE (X)

In [53]:
prediction1,actual1 = OVR(train_X1,train_y1,test_X1,test_y1,20000,0.0025)

epoch : 10000  loss : 0.24007866286417195
epoch : 20000  loss : 0.23680118954105106
------------------------------------------ final loss : 0.23680118954105106 ---
epoch : 10000  loss : 0.1752521261784632
epoch : 20000  loss : 0.16416244141006825
------------------------------------------ final loss : 0.16416244141006825 ---
epoch : 10000  loss : 0.281384114717651
epoch : 20000  loss : 0.27806784014081265
------------------------------------------ final loss : 0.27806784014081265 ---
epoch : 10000  loss : 0.22297134652014933
epoch : 20000  loss : 0.21767287570497387
------------------------------------------ final loss : 0.21767287570497387 ---
epoch : 10000  loss : 0.22175818987175216
epoch : 20000  loss : 0.211198716837698
------------------------------------------ final loss : 0.211198716837698 ---
epoch : 10000  loss : 0.12114989191594872
epoch : 20000  loss : 0.11273809885920652
------------------------------------------ final loss : 0.11273809885920652 ---
epoch : 10000  loss : 0

In [54]:
confusion_without_smote = confusion_matrix(actual1, prediction1)
confusion_without_smote

array([[0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0.],
       [0., 0., 1., 0., 0., 0., 2., 0., 0., 0., 0., 0.],
       [0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 2., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1.],
       [0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 2., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.]])

### 2. SMOTE (O)

In [46]:
prediction2,actual2 = OVR(train_X2,train_y2,test_X2,test_y2,100000,0.0005)

epoch : 10000  loss : 0.2983699795044497
epoch : 20000  loss : 0.2611569455527149
epoch : 30000  loss : 0.2473366437738021
epoch : 40000  loss : 0.24164112733430088
epoch : 50000  loss : 0.23881470383005338
epoch : 60000  loss : 0.23719708232203154
epoch : 70000  loss : 0.23618992626970173
epoch : 80000  loss : 0.23553188933471786
epoch : 90000  loss : 0.23508881092170372
epoch : 100000  loss : 0.23478409834385966
------------------------------------------ final loss : 0.23478409834385966 ---
epoch : 10000  loss : 0.2400181946008694
epoch : 20000  loss : 0.18745882470144323
epoch : 30000  loss : 0.15880505308162365
epoch : 40000  loss : 0.14176660822950332
epoch : 50000  loss : 0.13120364338910293
epoch : 60000  loss : 0.12428912713895102
epoch : 70000  loss : 0.1194388832204144
epoch : 80000  loss : 0.1158095823084796
epoch : 90000  loss : 0.11295044593793865
epoch : 100000  loss : 0.11060850003748399
------------------------------------------ final loss : 0.11060850003748399 ---
epoc

In [48]:
confusion_with_smote = confusion_matrix(actual2, prediction2)
confusion_with_smote

array([[0., 0., 2., 0., 0., 1., 0., 1., 0., 0., 0., 0.],
       [0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.],
       [2., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0.],
       [1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 2., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 2.],
       [0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0.],
       [0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 3., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 3.]])

# 5. Evaluation

In [32]:
def f1_scores(con,score): 
    # score = 0 : micro / score =1 : macro / score = 2 : weighted macro
    
    # (1) Micro F1
    if score==0: 
        return np.diag(con).sum()/con.sum()
    rec,pre,f1 = [],[],[]
    
    for i in range(con.shape[0]):
        recall = con[i][i] / con[i].sum()
        precision = con[i][i] / con[:,i].sum()
        f1_score = 2*recall*precision / (recall+precision)
        rec.append(recall)
        pre.append(precision)
        f1.append(f1_score)
    
    # (2) Macro F1
    if score==1:
        return np.average(f1)
    
    # (3) Weighted Macro F1
    elif score==2:
        w = [con[x].sum() for x in range(con.shape[0])]
        return np.average(f1,weights=w)

In [55]:
print('Micro F1 :',f1_scores(con,0).round(3))
print('Macro F1 (Average) :',f1_scores(con,1).round(3))
print('Macro F1 (Weighted Average) :',f1_scores(con,2).round(3))