<a href="https://colab.research.google.com/github/roesterl888/week_1/blob/main/week_1/CIFAR10-ShallowLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 1: CIFAR10 Challenge

**CIFAR10** (http://www.cs.toronto.edu/~kriz/cifar.html) is one of the most famous ML data sets.

## Data
* 32x32 color images
* in 10 classes
* 50k training images
* 10k test images



<img src="https://production-media.paperswithcode.com/datasets/CIFAR-10-0000000431-b71f61c0_U5n3Glr.jpg" width=700>

In [1]:
#get data
from keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 0us/step


In [2]:
#traindata: 50k 32X32 rgb images
X_train.shape

(50000, 32, 32, 3)

In [3]:
#labels
y_train

array([[6],
       [9],
       [9],
       ...,
       [9],
       [1],
       [1]], dtype=uint8)

## Task: build the best classifier (with feature extration) using the methods you know from ML1+2
* work in small teams (2-4)
* use NumPy pre-processing, feature extraction and hyer-parameter tuning in Scikit-Learn
* no Neural Networks!
* best test F1-Score winns!

In [23]:
import numpy as np
from skimage.feature import hog, local_binary_pattern
import cv2
from skimage.color import rgb2gray
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

def preprocess_images(images):

    images = images.astype('float32') / 255.0

    images = images - np.mean(images, axis=0)

    images = images / np.std(images, axis=0)
    return images

def extract_features(images):
    features = []
    for image in images:

        hist_r = cv2.calcHist([image], [0], None, [256], [0, 256])
        hist_g = cv2.calcHist([image], [1], None, [256], [0, 256])
        hist_b = cv2.calcHist([image], [2], None, [256], [0, 256])

        gray_image = rgb2gray(image)

        hog_features = hog(gray_image, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=False)

        lbp_features = local_binary_pattern(image[:,:,0], P=8, R=1)
        lbp_features = np.histogram(lbp_features, bins=256)[0]

        moments = cv2.moments(gray_image)

        feature_vector = np.concatenate([hist_r.flatten(), hist_g.flatten(), hist_b.flatten(),
                                          hog_features, lbp_features,
                                          np.array(list(moments.values()))])
        features.append(feature_vector)
    return np.array(features)


X_train_processed = preprocess_images(X_train)
X_train_features = extract_features(X_train_processed)

X_test_processed = preprocess_images(X_test)
X_test_features = extract_features(X_test_processed)




In [None]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder

X_train_features = X_train_features.reshape(len(X_train_features), -1)
X_test_features = X_test_features.reshape(len(X_test_features), -1)

X_train_features = np.nan_to_num(X_train_features)
X_test_features = np.nan_to_num(X_test_features)

encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC())
])

param_grid = {
    'classifier__C': [0.1, 1, 10],
    'classifier__kernel': ['linear', 'rbf'],
    'classifier__gamma': ['scale', 'auto']
}

grid_search = GridSearchCV(pipeline, param_grid, scoring='accuracy', cv=5, error_score='raise')
grid_search.fit(X_train_features, y_train)

best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_features)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

print("Beste Parameter:", grid_search.best_params_)
print("Accuracy:", accuracy)
print("F1-Score:", f1)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
