## ML HW2 best model

### Pytorch.nn



In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler    
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

We only use one-hot-encoding feature here

In [None]:
def load_data():
    x_train = pd.read_csv('X_train')
    x_test = pd.read_csv('X_test')

    x_train = x_train.values
    x_test = x_test.values

    y_train = pd.read_csv('Y_train', header = None)
    y_train = y_train.values
    y_train = y_train.reshape(-1)

    return x_train, y_train, x_test

Use np.clip to prevent overflow

In [None]:
def sigmoid(z):
    res = 1 / (1.0 + np.exp(-z))
    return np.clip(res, 1e-6, 1-1e-6)

Feature normalize, only on continues variable

In [None]:
def normalize(x_train, x_test):
    
    x_all = np.concatenate((x_train, x_test), axis = 0)
    mean = np.mean(x_all, axis = 0)
    std = np.std(x_all, axis = 0)

    index = [0, 1, 3, 4, 5]
    mean_vec = np.zeros(x_all.shape[1])
    std_vec = np.ones(x_all.shape[1])
    mean_vec[index] = mean[index]
    std_vec[index] = std[index]

    x_all_nor = (x_all - mean_vec) / std_vec

    x_train_nor = x_all_nor[0:x_train.shape[0]]
    x_test_nor = x_all_nor[x_train.shape[0]:]

    return x_train_nor, x_test_nor

In [None]:
def standardize(x_train, x_test):
  x_all = np.concatenate((x_train, x_test), axis = 0)
  index = [0, 1, 3, 4, 5]
  max_vec = np.ones(x_all.shape[1])
  min_vec = np.zeros(x_all.shape[1])
  minX = np.min(x_all, axis = 0)
  maxX = np.max(x_all, axis = 0)
  min_vec[index] = minX[index]
  max_vec[index] = maxX[index]
  x_all_stand = (x_all - min_vec) / (max_vec - min_vec)
  x_train_stand = x_all_stand[0:x_train.shape[0]]
  x_test_stand = x_all_stand[x_train.shape[0]:]
  return x_train_stand, x_test_stand

Gradient descent using adagrad

In [None]:
def train(x_train, y_train):
    #this function takes too long to train, wrote another one
    b = 0.0
    w = np.zeros(x_train.shape[1])
    lr = 0.01
    epoch = 500
    b_lr = 0
    w_lr = np.ones(x_train.shape[1])
    l2_reg = 0.001
    b_sum = 0
    w_sum = np.zeros(x_train.shape[1])
    loss = 0
    for e in range(epoch):
        # TODO : try to implement gradient descent
        b_epoch = 0
        w_epoch = np.zeros(x_train.shape[1])
        for i in range(x_train.shape[0]):
          yhead = sigmoid(w.dot(x_train[i, :]) + b)
          b_epoch += (-1) * (y_train[i] - yhead)
          for j in range(x_train.shape[1]):
            w_epoch[j] += (-1) * (y_train[i] - yhead) * x_train[i, j] + 2 * l2_reg * w[j]
        b_epoch /= x_train.shape[0]
        w_epoch /= x_train.shape[0]

        b_sum += (b_epoch ** 2)
        w_sum += (w_epoch ** 2)

        b -= lr / b_sum ** 0.5 * b_epoch
        w -= lr / w_sum ** 0.5 * w_epoch
        print("epoch:{}".format(e))
        
    return w, b

In [None]:
def train2(x_train, y_train):
  w = np.zeros((x_train.shape[1]))
  b = np.zeros(1)
  epoch = 500
  batch_size = 8
  lr = 0.01
  for i in range(int(len(x_train) / batch_size)):
    X = x_train[batch_size * i:batch_size * (i + 1)]
    Y = y_train[batch_size * i:batch_size * (i + 1)]
    Yhead = sigmoid(np.dot(X, w) + b)
    w_grad = (-1) * np.dot(np.transpose(X), (np.squeeze(Y) - Yhead).reshape((batch_size , 1)))
    b_grad = np.mean((-1) * (np.squeeze(Y) - Yhead))

    w -= lr * np.squeeze(w_grad)
    b -= lr * np.squeeze(b_grad)
  return w, b



In [None]:
def validate(x,y,w,b,threshold):
  result = np.zeros(x.shape[0])
  acc = 0
  loss = 0
  for i in range(len(result)):
    yhead = sigmoid(w.dot(x[i,:]) + b)
    if yhead >= float(threshold)/100:
      result[i] = 1
    else:
      result[i] = 0
    if result[i] == y[i]:
      acc += 1
    loss += (-1) * y[i] * np.log(yhead) + (1-y[i]) * np.log(1-yhead)
  return result, acc / len(result), loss

In [None]:
def predict(x,w,b,max_threshold):
  dummy = np.zeros(x.shape[0])
  x_pre, acc, loss = validate(x, dummy, w, b,max_threshold)
  return x_pre

In [None]:
x_train, y_train, x_test = load_data()

x_train, x_test = normalize(x_train, x_test)
#x_train, x_test = standardize(x_train, x_test)
split = int(x_train.shape[0] * 0.7)
x_train_set, x_val_set = x_train[0:split, :], x_train[split:, :]
y_train_set, y_val_set = y_train[0:split], y_train[split:]
w, b = train2(x_train_set, y_train_set)





In [None]:
#grid search to find the optimal threshold
max_acc = 0
max_threshold = 0
for threshold in np.linspace(35,65,1000):
  
  y_val_head, acc, loss = validate(x_val_set, y_val_set, w, b, threshold)
  if acc > max_acc:
    max_acc = acc
    max_threshold = threshold
  print("threshold: {}, accuracy: {}".format(threshold, acc))
print("accuracy: {}, loss: {}".format(acc,loss))
print(max_threshold)
y_test = predict(x_test, w, b, max_threshold)



threshold: 35.0, accuracy: 0.834402007248397
threshold: 35.030030030030034, accuracy: 0.834402007248397
threshold: 35.06006006006006, accuracy: 0.8348201839977697
threshold: 35.090090090090094, accuracy: 0.8348201839977697
threshold: 35.12012012012012, accuracy: 0.8348201839977697
threshold: 35.150150150150154, accuracy: 0.8349595762475607
threshold: 35.18018018018018, accuracy: 0.8348201839977697
threshold: 35.210210210210214, accuracy: 0.8348201839977697
threshold: 35.24024024024024, accuracy: 0.8348201839977697
threshold: 35.270270270270274, accuracy: 0.8348201839977697
threshold: 35.3003003003003, accuracy: 0.8350989684973515
threshold: 35.330330330330334, accuracy: 0.8349595762475607
threshold: 35.36036036036036, accuracy: 0.8352383607471424
threshold: 35.390390390390394, accuracy: 0.8350989684973515
threshold: 35.42042042042042, accuracy: 0.8352383607471424
threshold: 35.450450450450454, accuracy: 0.8352383607471424
threshold: 35.48048048048048, accuracy: 0.8350989684973515
thres

In [None]:
import csv
with open('predict_best.csv', 'w', newline='') as csvf:
    # 建立 CSV 檔寫入器
    writer = csv.writer(csvf)
    writer.writerow(['id','label'])
    for i in range(int(y_test.shape[0])):
      writer.writerow( [i + 1, int(y_test[i])] )

### Tip for math problem
[p1](https://people.eecs.berkeley.edu/~jrs/189/exam/mids14.pdf)  
[p2&3](https://people.eecs.berkeley.edu/~jordan/courses/260-spring10/other-readings/chapter13.pdf)  
[p3](https://stats.stackexchange.com/questions/351549/maximum-likelihood-estimators-multivariate-gaussian)