In [1]:
import pandas as pd
import numpy as np

from movie import load_movie
from model import HME

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
x_train, x_test, y_train, y_test = load_movie('train', data_limit=5000, train_ratio=0.8)

In [3]:
x_train.shape

(4000, 5879)

In [4]:
n_feature = x_train.shape[1]
n_level = 2

n_expert1 = 4
n_expert2 = 4

hme = HME(n_feature, n_expert1, n_expert2, n_level, 
          batch_size=64,
          lr=1., l1_coef=0.001, l21_coef=0.001,
          algo='fista')

In [5]:
print((hme.predict(x=x_train)==y_train).mean())
confusion_matrix(hme.predict(x=x_train), y_train)

0.4615


array([[ 535,  610],
       [1544, 1311]])

In [6]:
max_iter = 10000
stop_thre = 3

hme.fit(x_train, y_train, max_iter=max_iter, stop_thre=stop_thre, log_interval=max_iter // 50)

[accu: 0.8155]
[accu: 0.8165]
[accu: 0.8245]
[accu: 0.838]
[accu: 0.82775]
[accu: 0.82325]
[accu: 0.81475]
[accu: 0.8205]
stop increasing accuracy at iter: 1600


In [7]:
print((hme.predict(x=x_train)==y_train).mean())
confusion_matrix(y_train, hme.predict(x=x_train))

0.82


array([[1701,  378],
       [ 342, 1579]])

In [8]:
print((hme.predict(x=x_test)==y_test).mean())
confusion_matrix(y_test, hme.predict(x=x_test))

0.771


array([[389, 114],
       [115, 382]])

# Decision Tree

In [9]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(max_depth=4)
dt.fit(x_train, y_train.ravel())

DecisionTreeClassifier(max_depth=4)

In [10]:
y_pred = dt.predict(x_train)
y_real = y_train.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.67875


array([[ 963, 1116],
       [ 169, 1752]])

In [11]:
y_pred = dt.predict(x_test)
y_real = y_test.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.675


array([[233, 270],
       [ 55, 442]])

# Random Forest

In [12]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(max_depth=4)
rf.fit(x_train, y_train.ravel())

RandomForestClassifier(max_depth=4)

In [13]:
y_pred = rf.predict(x_train)
y_real = y_train.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.816


array([[1972,  107],
       [ 629, 1292]])

In [14]:
y_pred = rf.predict(x_test)
y_real = y_test.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.752


array([[454,  49],
       [199, 298]])