# SBM Classification

In [3]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import gc
import math
import pandas as pd
import numpy as np
import time, multiprocessing
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from tqdm import tqdm
import seaborn as sns

import rerf
from rerf.rerfClassifier import rerfClassifier

from scipy.stats import bernoulli
from scipy import stats

from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

from simulate.load_Xy import load_sbms_Xy

gc.enable()

import warnings
warnings.simplefilter('ignore')



In [4]:
ns = np.array([10,50,100,200,400,1000,2000])


n_train = max(ns)
n_test = 10000
communities1 = [30,30]
communities2 = [20,20,20]
p1 = [[0.3,0.1],[0.1,0.3]]
p2 = [[0.3,0.1,0.1],[0.1,0.3,0.1],[0.1,0.1,0.3]]
X_train,y_train,_ = load_sbms_Xy(int(n_train/2),int(n_train/2),communities1,communities2,p1,p2,ns,seed=1)
X_train,y_train,size_dict = load_sbms_Xy(int(n_test/2),int(n_test/2),communities1,communities2,p1,p2,seed=1)

In [2]:
## Setup for run
names = {"Log. Reg": "#a6cee3", 
         "Lin. SVM":"#1f78b4", 
         "SVM":"#b2df8a", 
         "kNN": "#33a02c", 
         "RF":"#fb9a99", 
         "MLP":"#fdbf6f", 
         "RerF":"#ff7f00", 
         "MORF":"#e31a1c",
         "CNN":"#cab2d6"}

ncores=40
num_runs=1
n_est=100

classifiers = [
    LogisticRegression(random_state=0, n_jobs=ncores, solver='liblinear'),
    LinearSVC(),
    SVC(C=1.0, kernel='rbf', gamma='auto',random_state=0),
    KNeighborsClassifier(3, n_jobs=ncores),
    RandomForestClassifier(n_estimators=n_est, max_features='auto', n_jobs=ncores),
    MLPClassifier(hidden_layer_sizes=(100, ), random_state=0, max_iter=1000),
    rerfClassifier(n_estimators = n_est, projection_matrix = "RerF",
                    max_features = 'auto', n_jobs = ncores),
    rerfClassifier(projection_matrix="Graph",
                   max_features='auto',
                   n_jobs=ncores,
                    n_estimators=n_est,
                    oob_score=False,
                    random_state=0,
                    image_height=size_dict['height'],
                    image_width=size_dict['width'],
                    patch_height_max=1,
                    patch_height_min=1,
                    patch_width_max=5,
                    patch_width_min=1
                   )
    ]

NameError: name 'LogisticRegression' is not defined

In [1]:
classifiers = [classifiers[-1]]
names = {"MORF":"#e31a1c"}

NameError: name 'classifiers' is not defined

In [None]:
# Train each classifier on each dataset size, then test
## Prep output file:
f = open('s-rerf_impulse_experiment_rerf_1run.csv', 'w+')
f.write("classifier,n,Lhat,trainTime,testTime,iterate\n")
f.flush()

runList = [(n, clf, run) for n in ns\
                       for clf in zip(classifiers, [key for key in names])\
                       for run in range(num_runs)]

for n, clf, iteration in tqdm(runList):
        X = X_train[:n]
        y = Y_train[:n]

        trainStartTime = time.time()
        clf[0].fit(X, y)
        trainEndTime = time.time()
        trainTime = trainEndTime - trainStartTime

        testStartTime = time.time()
        out = clf[0].predict(X_test)
        testEndTime = time.time()
        testTime = testEndTime - testStartTime

        lhat = np.mean(np.not_equal(out, Y_test).astype(int))


        ####("variable,Lhat,trainTime,testTime,iterate")
        f.write(f"{clf[1]}, {n}, {lhat:2.9f}, {trainTime:2.9f}, {testTime:2.9f}, {iteration}\n")
        f.flush()

f.close()