In [2]:
from sklearn import svm
from sklearn.metrics import accuracy_score
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, ParameterGrid
import pandas as pd
from sklearn import ensemble
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
def convert_sample(image):
    image = tf.image.rgb_to_grayscale(image)
    image = tf.image.resize(image,[96,96]).numpy()
    image = image.reshape(1,-1)
    return image

In [4]:
X = np.load('Xtrain.npy')
print(X.shape)
X = np.vstack(list(map(convert_sample,X)))
X = StandardScaler(with_mean=0, with_std=1).fit_transform(X)
print(f'Shape of training data features (observations,features): {X.shape}')

y = np.load('ytrain.npy')
y = y.reshape(-1,)    
print(f'Shape of training data labels (observations,): {y.shape}')

Xtest = np.load('Xtest.npy')
Xtest = np.vstack(list(map(convert_sample,Xtest)))
Xtest = StandardScaler(with_mean=0, with_std=1).fit_transform(Xtest)
print(f'Shape of training data features (observations,features): {Xtest.shape}')

(26214, 96, 96, 3)




Shape of training data features (observations,features): (26214, 9216)
Shape of training data labels (observations,): (26214,)
Shape of training data features (observations,features): (1638, 9216)




In [5]:
indices = np.random.permutation(26000)

subset1_indices = indices[:5200]
subset2_indices = indices[5200:10400]
subset3_indices = indices[10400:15600]
subset4_indices = indices[15600:20800]
subset5_indices = indices[20800:26000]

X_subset1, y_subset1 = X[subset1_indices], y[subset1_indices]
X_subset2, y_subset2 = X[subset2_indices], y[subset2_indices]
X_subset3, y_subset3 = X[subset3_indices], y[subset3_indices]
X_subset4, y_subset4 = X[subset4_indices], y[subset4_indices]
X_subset5, y_subset5 = X[subset5_indices], y[subset5_indices]

X_train1, X_val1, y_train1, y_val1 = train_test_split(X_subset1, y_subset1, test_size=0.2, random_state=42)
X_train2, X_val2, y_train2, y_val2 = train_test_split(X_subset2, y_subset2, test_size=0.2, random_state=42)
X_train3, X_val3, y_train3, y_val3 = train_test_split(X_subset3, y_subset3, test_size=0.2, random_state=42)
X_train4, X_val4, y_train4, y_val4 = train_test_split(X_subset4, y_subset4, test_size=0.2, random_state=42)
X_train5, X_val5, y_train5, y_val5 = train_test_split(X_subset5, y_subset5, test_size=0.2, random_state=42)

In [6]:
svm_best1 = svm.SVC(kernel='rbf', C = 1.0)

svm_best1.fit(np.concatenate([X_train1, X_val1]), np.concatenate([y_train1, y_val1]))

ytest_hat_svm1 = svm_best1.predict(Xtest)

In [7]:
svm_best2 = svm.SVC(kernel='rbf', C = 1.0)

svm_best2.fit(np.concatenate([X_train2, X_val2]), np.concatenate([y_train2, y_val2]))

ytest_hat_svm2 = svm_best2.predict(Xtest)

In [8]:
svm_best3 = svm.SVC(kernel='rbf', C = 1.0)

svm_best3.fit(np.concatenate([X_train3, X_val3]), np.concatenate([y_train3, y_val3]))

ytest_hat_svm3 = svm_best3.predict(Xtest)

In [9]:
svm_best4 = svm.SVC(kernel='rbf', C = 1.0)

svm_best4.fit(np.concatenate([X_train4, X_val4]), np.concatenate([y_train4, y_val4]))

ytest_hat_svm4 = svm_best4.predict(Xtest)

In [10]:
svm_best5 = svm.SVC(kernel='rbf', C = 1.0)

svm_best5.fit(np.concatenate([X_train5, X_val5]), np.concatenate([y_train5, y_val5]))

ytest_hat_svm5 = svm_best5.predict(Xtest)

In [11]:
rf_best1 = ensemble.RandomForestClassifier(
    max_depth = None, 
    n_estimators = 1000, 
    min_samples_split = 33,
    min_samples_leaf = 5,
    max_features="sqrt"
)

rf_best1.fit(np.concatenate([X_train1, X_val1]), np.concatenate([y_train1, y_val1]))

ytest_hat_rf1 = rf_best1.predict(Xtest)

In [12]:
rf_best2 = ensemble.RandomForestClassifier(
    max_depth = None, 
    n_estimators = 1000, 
    min_samples_split = 33,
    min_samples_leaf = 5,
    max_features="sqrt"
)

rf_best2.fit(np.concatenate([X_train2, X_val2]), np.concatenate([y_train2, y_val2]))

ytest_hat_rf2 = rf_best2.predict(Xtest)

In [13]:
rf_best3 = ensemble.RandomForestClassifier(
    max_depth = None, 
    n_estimators = 1000, 
    min_samples_split = 33,
    min_samples_leaf = 5,
    max_features="sqrt"
)

rf_best3.fit(np.concatenate([X_train3, X_val3]), np.concatenate([y_train3, y_val3]))

ytest_hat_rf3 = rf_best3.predict(Xtest)

In [14]:
rf_best4 = ensemble.RandomForestClassifier(
    max_depth = None, 
    n_estimators = 1000, 
    min_samples_split = 33,
    min_samples_leaf = 5,
    max_features="sqrt"
)

rf_best4.fit(np.concatenate([X_train4, X_val4]), np.concatenate([y_train4, y_val4]))

ytest_hat_rf4 = rf_best4.predict(Xtest)

In [15]:
rf_best5 = ensemble.RandomForestClassifier(
    max_depth = None, 
    n_estimators = 1000, 
    min_samples_split = 33,
    min_samples_leaf = 5,
    max_features="sqrt"
)

rf_best5.fit(np.concatenate([X_train5, X_val5]), np.concatenate([y_train5, y_val5]))

ytest_hat_rf5 = rf_best5.predict(Xtest)

In [16]:
gb_best1 = GradientBoostingClassifier(n_estimators = 800, min_samples_split=10, min_samples_leaf=2, max_depth=2, learning_rate=0.15)
gb_best1.fit(np.concatenate([X_train1, X_val1]), np.concatenate([y_train1, y_val1]))

ytest_hat_gb1 = gb_best1.predict(Xtest)

In [17]:
gb_best2 = GradientBoostingClassifier(n_estimators = 800, min_samples_split=10, min_samples_leaf=2, max_depth=2, learning_rate=0.15)
gb_best2.fit(np.concatenate([X_train2, X_val2]), np.concatenate([y_train2, y_val2]))

ytest_hat_gb2 = gb_best2.predict(Xtest)

In [18]:
gb_best3 = GradientBoostingClassifier(n_estimators = 800, min_samples_split=10, min_samples_leaf=2, max_depth=2, learning_rate=0.15)
gb_best3.fit(np.concatenate([X_train3, X_val3]), np.concatenate([y_train3, y_val3]))

ytest_hat_gb3 = gb_best3.predict(Xtest)

In [19]:
gb_best4 = GradientBoostingClassifier(n_estimators = 800, min_samples_split=10, min_samples_leaf=2, max_depth=2, learning_rate=0.15)
gb_best4.fit(np.concatenate([X_train4, X_val4]), np.concatenate([y_train4, y_val4]))

ytest_hat_gb4 = gb_best4.predict(Xtest)

In [20]:
gb_best5 = GradientBoostingClassifier(n_estimators = 800, min_samples_split=10, min_samples_leaf=2, max_depth=2, learning_rate=0.15)
gb_best5.fit(np.concatenate([X_train5, X_val5]), np.concatenate([y_train5, y_val5]))

ytest_hat_gb5 = gb_best5.predict(Xtest)

In [21]:
y_test_hat_combined1 = np.c_[ytest_hat_gb1, ytest_hat_rf1, ytest_hat_svm1]
y_test_hat_combined1 = np.round(np.sum(y_test_hat_combined1, axis=1) / y_test_hat_combined1.shape[1]).astype(int)

In [22]:
y_test_hat_combined2 = np.c_[ytest_hat_gb2, ytest_hat_rf2, ytest_hat_svm2]
y_test_hat_combined2 = np.round(np.sum(y_test_hat_combined2, axis=1) / y_test_hat_combined2.shape[1]).astype(int)

In [23]:
y_test_hat_combined3 = np.c_[ytest_hat_gb3, ytest_hat_rf3, ytest_hat_svm3]
y_test_hat_combined3 = np.round(np.sum(y_test_hat_combined3, axis=1) / y_test_hat_combined3.shape[1]).astype(int)

In [24]:
y_test_hat_combined4 = np.c_[ytest_hat_gb4, ytest_hat_rf4, ytest_hat_svm4]
y_test_hat_combined4 = np.round(np.sum(y_test_hat_combined4, axis=1) / y_test_hat_combined4.shape[1]).astype(int)

In [25]:
y_test_hat_combined5 = np.c_[ytest_hat_gb5, ytest_hat_rf5, ytest_hat_svm5]
y_test_hat_combined5 = np.round(np.sum(y_test_hat_combined5, axis=1) / y_test_hat_combined5.shape[1]).astype(int)

In [26]:
y_test_hat_combined = np.c_[y_test_hat_combined1, y_test_hat_combined2, y_test_hat_combined3, y_test_hat_combined4, y_test_hat_combined5]
y_test_hat_combined = np.round(np.sum(y_test_hat_combined, axis=1) / y_test_hat_combined.shape[1]).astype(int)

In [27]:
y_test_hat_combined= pd.DataFrame({
    'Id': list(range(len(y_test_hat_combined))),
    'Predicted': y_test_hat_combined.reshape(-1,),
})
y_test_hat_combined.to_csv("ytest_hat.csv", index=False)