In [1]:
import pandas as pd
import numpy as np

from movie import load_movie
from model import HME

from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
x_train, x_test, y_train, y_test = load_movie('train', data_limit=5000, train_ratio=0.8)

In [3]:
x_train.shape

(4000, 5879)

In [4]:
n_feature = x_train.shape[1]
n_level = 2

n_expert1 = 4
n_expert2 = 4

hme = HME(n_feature, n_expert1, n_expert2, n_level, 
          batch_size=64,
          lr=1., l1_coef=0.0001, l21_coef=0.0001,
          algo='fista')

In [5]:
print((hme.predict(x=x_train)==y_train).mean())
confusion_matrix(hme.predict(x=x_train), y_train)

0.4715


array([[ 529,  564],
       [1550, 1357]])

In [6]:
max_iter = 10000
stop_thre = 3

hme.fit(x_train, y_train, max_iter=max_iter, stop_thre=stop_thre, log_interval=max_iter // 50)

[accu: 0.92875]
[accu: 0.946]
[accu: 0.95]
[accu: 0.955]
[accu: 0.95775]
[accu: 0.96075]
[accu: 0.9605]
[accu: 0.95875]
[accu: 0.9635]
[accu: 0.96525]
[accu: 0.96575]
[accu: 0.9655]
[accu: 0.9665]
[accu: 0.96725]
[accu: 0.968]
[accu: 0.96725]
[accu: 0.9645]
[accu: 0.96675]
[accu: 0.9665]
stop increasing accuracy at iter: 3800


In [7]:
print((hme.predict(x=x_train)==y_train).mean())
confusion_matrix(y_train, hme.predict(x=x_train))

0.9665


array([[2009,   70],
       [  64, 1857]])

In [8]:
print((hme.predict(x=x_test)==y_test).mean())
confusion_matrix(y_test, hme.predict(x=x_test))

0.823


array([[404,  99],
       [ 78, 419]])

# Decision Tree

In [15]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(x_train, y_train.ravel())

DecisionTreeClassifier()

In [16]:
y_pred = dt.predict(x_train)
y_real = y_train.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

1.0


array([[2079,    0],
       [   0, 1921]])

In [17]:
y_pred = dt.predict(x_test)
y_real = y_test.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.671


array([[355, 148],
       [181, 316]])

# Random Forest

In [21]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(x_train, y_train.ravel())

RandomForestClassifier()

In [22]:
y_pred = rf.predict(x_train)
y_real = y_train.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

1.0


array([[2079,    0],
       [   0, 1921]])

In [23]:
y_pred = rf.predict(x_test)
y_real = y_test.ravel()

print((y_pred==y_real).mean())
confusion_matrix(y_real, y_pred)

0.817


array([[423,  80],
       [103, 394]])