# Image Analysis and Deep Learning - Assignment

You can use this notebook as a template for your assignment.

---

Group information:
- Max Noorland, max.noorland@gsom.polimi.it
- Diego Cossu, diego.cossu@gsom.polimi.it
- Daniyar Serik, daniyar.serik@gsom.polimi.it

Politecnico di Milano, May 2024

---

In [16]:
from google.colab import drive
drive.mount('/content/drive')

FOLDERNAME = 'Assignment - Deep Learning'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

# Change the directory
%cd /content/drive/My\ Drive/$FOLDERNAME
# Check that we are in the correct directory
!pwd
# list all the files
%ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Assignment - Deep Learning
/content/drive/My Drive/Assignment - Deep Learning
AssignmentRules.pdf  data.npz  ReportTemplate.docx  Starting_Kit.ipynb


## Import libraries

In [34]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)
import matplotlib.pyplot as plt
import random

from sklearn.metrics import accuracy_score

import logging
import numpy as np
import pandas as pd
import random
random.seed(seed)

import seaborn as sns
import cv2
from skimage.feature import graycomatrix, greycoprops  # Updated function
import skimage.filters
import skimage.measure
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [35]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from sklearn import neighbors
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

2.15.0


In [36]:
# Import other libraries
from sklearn.metrics import accuracy_score

## Load data


In [37]:
data = np.load("data.npz", allow_pickle=True)
list(data.keys())

['data', 'labels', 'names']

In [38]:
X = data['data']
X.shape

(15092, 48, 48, 3)

In [39]:
y = data['labels']
y.shape

(15092, 1)

In [40]:
labels = data['names']
labels

array({0: 'Basophil', 1: 'Eosinophil', 2: 'Erythroblast', 3: 'Immature Granulocyte', 4: 'Lymphocyte', 5: 'Monocyte', 6: 'Neutrophil', 7: 'Platelet'},
      dtype=object)

## Model with hand-crafted features

In [41]:
def extract_features(images):
    features = []
    for img in images:
        # Ensure the image is 2D (grayscale)
        if img.ndim == 3:
            img = img.astype('uint8')
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Example feature: Mean and standard deviation of pixel values
        mean = np.mean(img)
        std = np.std(img)

        # Example feature: GLCM properties
        glcm = graycomatrix(img, [1], [0], 256, symmetric=True, normed=True)  # Updated function
        contrast = greycoprops(glcm, 'contrast')[0, 0]
        dissimilarity = greycoprops(glcm, 'dissimilarity')[0, 0]
        homogeneity = greycoprops(glcm, 'homogeneity')[0, 0]
        energy = greycoprops(glcm, 'energy')[0, 0]
        correlation = greycoprops(glcm, 'correlation')[0, 0]

        # Append extracted features to the list
        features.append([mean, std, contrast, dissimilarity, homogeneity, energy, correlation])

    return np.array(features)

In [42]:
# Load dataset
data = np.load('data.npz')
images = data['data']  # Use the correct key
labels = data['labels']  # Use the correct key

In [43]:
# Extract features and split the dataset
features = extract_features(images)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=seed)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/usr/local/lib/python3.10/dist-packages/skimage/feature/__init__.py:42: skimage_deprecation: Function ``greycoprops`` is deprecated and will be removed in version 1.0. Use ``skimage.feature.graycoprops`` instead.
  removed_version='1.0')
/usr/local/lib/python3.10/dist-packages/skimage/feature/__init__.py:42: skimage_deprecation: Function ``greycoprops`` is deprecated and will be removed in version 1.0. Use ``skimage.feature.graycoprops`` instead.
  removed_version='1.0')
/usr/local/lib/python3.10/dist-packages/skimage/feature/__init__.py:42: skimage_deprecation: Function ``greycoprops`` is deprecated and will be removed in version 1.0. Use ``skimage.feature.graycoprops`` instead.
  removed_version='1.0')
/usr/local/lib/python3.10/dist-packages/skimage/feature/__init__.py:42: skimage_deprecation: Function ``greycoprops`` is deprecated and will be removed in version 1.0. Use ``skimage.feature.graycoprops`` instead.
  remove

In [44]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [45]:
# Train a RandomForest classifier on the extracted features
rf_clf = RandomForestClassifier(random_state=seed)
rf_clf.fit(X_train, y_train)

In [46]:
# Evaluate the model
y_pred = rf_clf.predict(X_test)
print(f"Accuracy of hand-crafted features model: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))

Accuracy of hand-crafted features model: 0.6435905929115601
              precision    recall  f1-score   support

           0       0.47      0.41      0.44       214
           1       0.52      0.62      0.56       565
           2       0.81      0.60      0.69       296
           3       0.52      0.54      0.53       495
           4       0.57      0.49      0.53       193
           5       0.45      0.30      0.36       264
           6       0.73      0.85      0.78       603
           7       0.98      0.98      0.98       389

    accuracy                           0.64      3019
   macro avg       0.63      0.60      0.61      3019
weighted avg       0.64      0.64      0.64      3019



## Convolutional Neural Network (CNN) Implementation

In [47]:
# Define the CNN model
def create_cnn_model(input_shape, num_classes):
    model = tfk.Sequential()
    model.add(tfkl.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(tfkl.MaxPooling2D((2, 2)))
    model.add(tfkl.Conv2D(64, (3, 3), activation='relu'))
    model.add(tfkl.MaxPooling2D((2, 2)))
    model.add(tfkl.Conv2D(128, (3, 3), activation='relu'))
    model.add(tfkl.Flatten())
    model.add(tfkl.Dense(128, activation='relu'))
    model.add(tfkl.Dropout(0.5))
    model.add(tfkl.Dense(num_classes, activation='softmax'))
    return model

In [48]:
# Load dataset
data = np.load('data.npz')
images = data['data']  # Use the correct key
labels = data['labels']  # Use the correct key

In [49]:
# Normalize images
images = images / 255.0

In [50]:
# Ensure the images are 2D (grayscale)
if images.ndim == 4 and images.shape[-1] == 3:
    images = np.array([cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY) for img in images])

In [51]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=seed)

In [52]:
# Reshape images for CNN
X_train = X_train.reshape(X_train.shape[0], 48, 48, 1)
X_test = X_test.reshape(X_test.shape[0], 48, 48, 1)

In [53]:
# One-hot encode labels
y_train = tfk.utils.to_categorical(y_train, num_classes=8)
y_test = tfk.utils.to_categorical(y_test, num_classes=8)

In [54]:
# Create the model
input_shape = (48, 48, 1)  # Assuming images are grayscale
model = create_cnn_model(input_shape, num_classes=8)

In [55]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [56]:
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [57]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'CNN Model Accuracy: {test_accuracy:.4f}')

CNN Model Accuracy: 0.2001


In [58]:
# Save the model
model.save('FinalModel')

## Make test inference
This is the function we will use to evaluate your deep model.

In [73]:
import numpy as np
import cv2
import tensorflow as tf
from tensorflow import keras as tfk
from sklearn.metrics import accuracy_score

def test_model(test_data_path, model_path):
    # Load the data
    data = np.load(test_data_path)
    X_test = data['data']
    y_test = data['labels'].astype('int32')

    # The data that will be used to test the model will be in range [0,255] and with type int32
    X_test = (X_test / 255.0).astype('float32')

    # Ensures the images are 2D (grayscale)
    if X_test.ndim == 4 and X_test.shape[-1] == 3:
        X_test = np.array([cv2.cvtColor(img.astype('uint8'), cv2.COLOR_BGR2GRAY) for img in X_test])

    # Reshapes images for CNN
    X_test = X_test.reshape(X_test.shape[0], 48, 48, 1)

    # Load the final model
    final_model = tfk.models.load_model(model_path)

    # Make the test predictions
    predictions = final_model.predict(X_test, verbose=0)
    predictions = np.argmax(predictions, axis=-1).astype('int32')

    # Compute classification metrics
    accuracy = accuracy_score(y_test, predictions)

    # Display the computed metrics
    print('Accuracy:', accuracy.round(4))

# Correct paths based on your directory listing
test_data_path = '/content/drive/MyDrive/Assignment - Deep Learning/data.npz'
model_path = '/content/drive/MyDrive/Assignment - Deep Learning/FinalModel'

test_model(test_data_path, model_path)

Accuracy: 0.1949
