# Projekt z przedmiotu Metody Klasyfikacji Danych Geoinformatycznych - Etap 3

### Analiza/klasyfikacja dowolnych obrazów za pomocą macierzy wspólnych wystąpień (Grey Level Co-occurence Matrices - GLCM)

#### Skład zespołu: Marta Dzięgielewska s176363, Maciej Gielert s176137

##### Link do etapu 1: https://docs.google.com/document/d/1HsTw6qhrXO0SoeWtg8hENvtkOi2oqkhV-VhOnhndA5s/edit?usp=sharing

##### Link do etapu 2: https://github.com/mdziegielewska/Image-Analysis-Classification-With-GLCM/blob/main/texture_analysis.ipynb

In [96]:
# imports

import tensorflow as tf
import numpy as np
import skimage.transform
from scipy import ndimage
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
from sklearn import preprocessing
from skimage.color import label2rgb
from skimage.util import montage as montage2d
from skimage.feature import graycomatrix, graycoprops
import cv2
import glob
import os

%matplotlib inline

##### Dataset do pobrania: https://www.robots.ox.ac.uk/~vgg/data/dtd/

In [97]:
SIZE = 128

# capture images and labels into arrays

images = []
labels = [] 

for directory_path in glob.glob("dtd/images/*/"):
    label = os.path.basename(os.path.dirname(directory_path))

    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        # reading images in grayscale
        img = cv2.imread(img_path, 0)
        img = cv2.resize(img, (SIZE, SIZE))

        images.append(img)
        labels.append(label)

images = np.array(images)
labels = np.asarray(labels)

In [98]:
print(images.shape)
print(labels.shape)

(5640, 128, 128)
(5640,)


In [99]:
# encode labels from text (folder names) to integers.

encoder = preprocessing.LabelEncoder()
encoder.fit(labels)
labels = encoder.transform(labels)

In [100]:
# test train split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [101]:
grayco_prop_list = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

In [115]:
# feature extractor function

def feature_extractor(dataset):
    image_dataset = pd.DataFrame()

    for image in range(dataset.shape[0]):
        df = pd.DataFrame() 
        for angle in [0, np.pi/4, np.pi/2, 3*np.pi/4]:
            for distance in [0,1,3,5]:
                for greylevel in [256]:
                
                    img = dataset[image, :, :]
                    #print(img.shape)

                    # calculate glcm
                    glcm = graycomatrix(img, distances=[distance], angles=[angle], levels=greylevel, symmetric=True, normed=True)

                    for prop in grayco_prop_list: 
                        glcm_prop = graycoprops(glcm, prop)[0]
                        df[f'{prop}{angle}{distance}{greylevel}'] = glcm_prop

                    # append features from current image to the dataset
        image_dataset = image_dataset.append(df)
        
    return image_dataset

In [155]:
# extract features from training images

#image_features_train = feature_extractor(X_train)
train_data = image_features_train.to_numpy()

#image_features_test = feature_extractor(X_test)
test_data = image_features_test.to_numpy()

In [156]:
image_features_train

Unnamed: 0,contrast00256,dissimilarity00256,homogeneity00256,energy00256,correlation00256,ASM00256,contrast01256,dissimilarity01256,homogeneity01256,energy01256,...,homogeneity2.3561944901923453256,energy2.3561944901923453256,correlation2.3561944901923453256,ASM2.3561944901923453256,contrast2.3561944901923455256,dissimilarity2.3561944901923455256,homogeneity2.3561944901923455256,energy2.3561944901923455256,correlation2.3561944901923455256,ASM2.3561944901923455256
0,0.0,0.0,1.0,0.109267,1.0,0.011939,129.773991,7.674705,0.154860,0.022015,...,0.111265,0.019183,0.857710,0.000368,674.418835,16.474246,0.084893,0.017725,0.723170,0.000314
0,0.0,0.0,1.0,0.089057,1.0,0.007931,2485.057825,39.998278,0.023768,0.009696,...,0.024970,0.009696,0.008804,0.000094,2395.573556,39.382999,0.025601,0.009755,0.023315,0.000095
0,0.0,0.0,1.0,0.086952,1.0,0.007561,417.124446,11.890933,0.136776,0.016973,...,0.074291,0.012921,0.739375,0.000167,3342.736863,40.981660,0.052787,0.011009,0.438049,0.000121
0,0.0,0.0,1.0,0.082800,1.0,0.006856,557.382443,14.519131,0.117440,0.015267,...,0.075379,0.012526,0.733607,0.000157,2066.179045,32.413437,0.048520,0.010757,0.559282,0.000116
0,0.0,0.0,1.0,0.078950,1.0,0.006233,664.149299,19.310470,0.061154,0.010397,...,0.045299,0.009778,0.750276,0.000096,1485.146852,29.850676,0.036526,0.009308,0.620754,0.000087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.0,0.0,1.0,0.076091,1.0,0.005790,1411.812808,27.609806,0.043412,0.009239,...,0.027942,0.008335,0.405786,0.000069,3688.209482,47.511381,0.022534,0.008204,0.213557,0.000067
0,0.0,0.0,1.0,0.075094,1.0,0.005639,304.685839,10.476070,0.161949,0.014980,...,0.049389,0.009887,0.754501,0.000098,2592.342092,39.602497,0.026368,0.008531,0.495923,0.000073
0,0.0,0.0,1.0,0.083320,1.0,0.006942,274.631459,9.894131,0.166134,0.018564,...,0.073627,0.012921,0.742942,0.000167,2164.963775,33.631829,0.040350,0.010719,0.497480,0.000115
0,0.0,0.0,1.0,0.085233,1.0,0.007265,1234.037094,26.612512,0.041441,0.010167,...,0.018004,0.009189,-0.196290,0.000084,2756.427940,41.638007,0.025499,0.009260,0.085144,0.000086


##### Wykonano trening na wszystkich cechach

##### SVM 

In [157]:
X_train2 = train_data
X_test2 = test_data
np.shape(test_data)
np.shape(train_data)


(4512, 96)

In [335]:
train_data = X_train2
test_data = X_test2
np.shape(test_data)
np.shape(train_data)

(4512, 96)

In [336]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

sel = SelectKBest(f_classif, k=40).fit(X_train2, y_train)
train_data = sel.transform(train_data)
test_data = sel.transform(test_data)


  f = msb / msw


In [337]:
print(np.shape(test_data))
print(np.shape(train_data))

(1128, 40)
(4512, 40)


In [338]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators = 50, random_state = 4235).fit(train_data , y_train)

In [339]:
y_pred = clf.predict(test_data)
print(y_pred)
print(y_test)

[10  9  5 ... 21 44 15]
[17 13 29 ... 38 25 22]


In [340]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
prec = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'{acc} {f1} {prec} {recall}')

0.23581560283687944 0.22429907811307742 0.24009306134756317 0.23582699102276916


##### Random Forest

##### LightGBM

##### Następnie sprawdzono klasyfikację przy 4 cechach: energii, homogeniczności, kontraście i korelacji

##### SVM 

##### Random Forest

##### LightGBM

##### Na koniec przetestowano homogeniczność, kontrast, korelacja, odmienność

##### SVM 

##### Random Forest

##### LightGBM

#### WNIOSKI: