In [8]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [9]:
train_data = pd.read_csv("fashion-mnist_train.csv")
final_test_data = pd.read_csv("fashion-mnist_test.csv")

In [10]:
X_train = train_data.iloc[:, 1:]
y_train = train_data.label.astype("str")

X_final_test = final_test_data.iloc[:, 1:]
y_final_test = final_test_data.label.astype("str")

In [11]:
x_train, x_test, y_train_v, y_test_v = train_test_split(X_train, y_train, test_size=0.3, random_state=2)

In [None]:
rf = RandomForestClassifier()
rf.fit(x_train, y_train_v)

y_pred_train = rf.predict(x_train)
y_pred_test = rf.predict(x_test)

print("Training metrics:")
print(classification_report(y_true=y_train_v, y_pred=y_pred_train))

print("Test data metrics:")
print(classification_report(y_true=y_test_v, y_pred=y_pred_test))

y_pred_test_final = rf.predict(X_final_test)

print("Final Test data metrics:")
print(classification_report(y_true=y_final_test, y_pred=y_pred_test_final))

In [None]:
param_grid = {'n_estimators': [100, 200], 'min_samples_leaf': [2, 3]}
clf = RandomForestClassifier()
grid_search = GridSearchCV(clf, param_grid=param_grid)
model_grid_search = grid_search.fit(x_train, y_train_v)

y_pred_train_gs = model_grid_search.predict(x_train)
y_pred_test_gs = model_grid_search.predict(x_test)

print("Training metrics (Grid Search):")
print(classification_report(y_true=y_train_v, y_pred=y_pred_train_gs))

print("Test data metrics (Grid Search):")
print(classification_report(y_true=y_test_v, y_pred=y_pred_test_gs))

y_pred_test_final_gs = model_grid_search.predict(X_final_test)

print("Final Test data metrics (Grid Search):")
print(classification_report(y_true=y_final_test, y_pred=y_pred_test_final_gs))

In [None]:
kmeans = KMeans(n_clusters=10, init='k-means++')

kmeans.fit(X_train)
X_train["k_means_label"] = kmeans.labels_.astype('str')

kmeans.fit(X_final_test)
X_final_test["k_means_label"] = kmeans.labels_.astype('str')

x_train, x_test, y_train_v, y_test_v = train_test_split(X_train, y_train, test_size=0.3, random_state=2)

model_kmeans = grid_search.fit(x_train, y_train_v)

y_pred_train_kmeans = model_kmeans.predict(x_train)
y_pred_test_kmeans = model_kmeans.predict(x_test)

print("Training metrics (K-means):")
print(classification_report(y_true=y_train_v, y_pred=y_pred_train_kmeans))

print("Test data metrics (K-means):")
print(classification_report(y_true=y_test_v, y_pred=y_pred_test_kmeans))

y_pred_test_final_kmeans = model_kmeans.predict(X_final_test)

print("Final Test data metrics (K-means):")
print(classification_report(y_true=y_final_test, y_pred=y_pred_test_final_kmeans))

In [None]:
sk_fold = StratifiedKFold(n_splits=5, shuffle=True)

for train_index, test_index in sk_fold.split(x_train, y_train_v):
    train = X_train.iloc[train_index, :]
    y_trn_k = y_train_v.iloc[train_index]
    test = X_train.iloc[test_index, :]
    y_tst_k = y_train_v.iloc[test_index]

    model_kmeans.fit(train, y_trn_k)
    y_pred_train_cv = model_kmeans.predict(train)

    y_pred_test_cv = model_kmeans.predict(test)

    print("Training metrics (Cross Validation):")
    print(classification_report(y_true=y_trn_k, y_pred=y_pred_train_cv))

    print("Test data metrics (Cross Validation):")
    print(classification_report(y_true=y_tst_k, y_pred=y_pred_test_cv))

In [None]:
y_pred_train_final_cv = model_kmeans.predict(X_train)

y_pred_test_final_cv = model_kmeans.predict(X_final_test)

print("Training metrics (Cross Validation):")
print(classification_report(y_true=y_train, y_pred=y_pred_train_final_cv))

print("Final Test data metrics (Cross Validation):")
print(classification_report(y_true=y_final_test, y_pred=y_pred_test_final_cv))