# Load Packages

In [1]:
# scikit-learn, scikit-image are needed
# If those packages are missed, then, install them by using the following command
# pip install scikit-learn scikit-image

import os

import sklearn.datasets
import sklearn.linear_model
import sklearn.svm
import sklearn.tree
import sklearn.ensemble
import sklearn.model_selection
import sklearn.metrics

import skimage.io
import skimage.transform
import skimage.color

import numpy as np

import matplotlib.pyplot as plt 
%matplotlib inline

  "class": algorithms.Blowfish,


# Load Additional Packages (if you want to use other modules in Scikit Learn)

In [2]:
# Load additional scikit learn packages! if you need
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(max_iter=1000)
from sklearn.model_selection import GridSearchCV
from skimage import transform, util

# Load Data Points (Do not modify the following block)

In [3]:
image_size = 64
labels = ['glioma_tumor','meningioma_tumor','no_tumor','pituitary_tumor']

images = []
y = []
for i in labels:
    folderPath = os.path.join('./tumor_dataset/Training',i)
    for j in os.listdir(folderPath):
        img = skimage.io.imread(os.path.join(folderPath,j),)
        img = skimage.transform.resize(img,(image_size,image_size))
        img = skimage.color.rgb2gray(img)
        images.append(img)
        y.append(i)
        
images = np.array(images)

X = images.reshape((-1, image_size**2))
y = np.array(y)

In [4]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.3, random_state=0)

# Classification with Scikit Learn Library (Programming Assignment)
### Variable Explanation (Do not change variable names)
- 'X_train' is feature vectors of training dataset
- 'y_train' is target labels of training dataset
- 'X_test' is feature vectors of test dataset
- 'y_test' is target labels of test dataset
- 'y_pred' was initialized as zero vectors and fill 'y_pred' with predicted labels

### Find the best model and hyperparameter for tumor classification
- Find the best random seed as well and fix it to reproduce your result on other computers.

In [None]:
#TODO
#1. Create a classification object in scikit learn package (such as perceptron, logistic regression, or other classification algorithms)
#2. Fit the object to training dataset
#3. Predict the label of test data point (X_test)
# - Do not change the variable name "y_pred"

augmented_train_images = []
augmented_train_labels = []
for i in range(len(X_train)):
    augmented_train_images.append(X_train[i])
    augmented_train_labels.append(y_train[i])

    augmented_train_images.append(transform.rotate(X_train[i].reshape((image_size, image_size)), angle=10).flatten())
    augmented_train_labels.append(y_train[i])

    augmented_train_images.append(util.img_as_ubyte(np.fliplr(X_train[i].reshape((image_size, image_size)))).flatten())
    augmented_train_labels.append(y_train[i])

augmented_train_images = np.array(augmented_train_images)
augmented_train_labels = np.array(augmented_train_labels)


    
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'gamma': [0.001, 0.01, 0.1, 1]}
grid_search = GridSearchCV(sklearn.svm.SVC(kernel='rbf', random_state=42), param_grid, cv=3)
grid_search.fit(X_train, y_train)

best_classifier = grid_search.best_estimator_
y_pred_best = best_classifier.predict(X_test)

classifier = sklearn.svm.SVC(kernel='rbf', C=10, gamma=0.001, random_state=42)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
y_pred = np.zeros_like(y_test)


### Print accuracy (do not modify the following block)

In [None]:
print('Accuracy: %.2f' % sklearn.metrics.accuracy_score(y_test, y_pred))