# Projekt z przedmiotu Metody Klasyfikacji Danych Geoinformatycznych - Etap 3

### Analiza/klasyfikacja dowolnych obrazów za pomocą macierzy wspólnych wystąpień (Grey Level Co-occurence Matrices - GLCM)

#### Skład zespołu: Marta Dzięgielewska s176363, Maciej Gielert s176137

##### Link do etapu 1: https://docs.google.com/document/d/1HsTw6qhrXO0SoeWtg8hENvtkOi2oqkhV-VhOnhndA5s/edit?usp=sharing

##### Link do etapu 2: https://github.com/mdziegielewska/Image-Analysis-Classification-With-GLCM/blob/main/texture_analysis.ipynb

In [1]:
# imports

import tensorflow as tf
import numpy as np
import skimage.transform
from scipy import ndimage
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd
from sklearn import preprocessing
from skimage.color import label2rgb
from skimage.util import montage as montage2d
from skimage.feature import graycomatrix, graycoprops
import cv2
import glob
import os

%matplotlib inline

##### Dataset do pobrania: https://www.robots.ox.ac.uk/~vgg/data/dtd/

In [2]:
SIZE = 128

# capture images and labels into arrays

images = []
labels = [] 

for directory_path in glob.glob("dtd/images/*/"):
    label = os.path.basename(os.path.dirname(directory_path))

    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        # reading images in grayscale
        img = cv2.imread(img_path, 0)
        img = cv2.resize(img, (SIZE, SIZE))

        images.append(img)
        labels.append(label)

images = np.array(images)
labels = np.asarray(labels)

In [3]:
labels_history = labels

In [None]:
print(images.shape)
print(labels.shape)

In [4]:
# encode labels from text (folder names) to integers.

encoder = preprocessing.LabelEncoder()
encoder.fit(labels)
labels = encoder.transform(labels)

In [5]:
# test train split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

In [6]:
grayco_prop_list = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM']

In [None]:
# feature extractor function

def feature_extractor(dataset):
    image_dataset = pd.DataFrame()

    for image in range(dataset.shape[0]):
        df = pd.DataFrame() 
        for angle in [0, np.pi/4, np.pi/2, 3*np.pi/4]:
            for distance in [0,1,3,5]:
                for greylevel in [256]:
                
                    img = dataset[image, :, :]
                    #print(img.shape)

                    # calculate glcm
                    glcm = graycomatrix(img, distances=[distance], angles=[angle], levels=greylevel, symmetric=True, normed=True)

                    for prop in grayco_prop_list: 
                        glcm_prop = graycoprops(glcm, prop)[0]
                        df[f'{prop}{angle}{distance}{greylevel}'] = glcm_prop

                    # append features from current image to the dataset
        image_dataset = image_dataset.append(df)
        
    return image_dataset

In [None]:
# extract features from training images

image_features_train = feature_extractor(X_train)
train_data = image_features_train.to_numpy()

image_features_test = feature_extractor(X_test)
test_data = image_features_test.to_numpy()

In [None]:
image_features_train

In [None]:
image_features_test

##### Wykonano trening na wszystkich cechach

##### SVM 

In [None]:
tocsvfile = image_features_train
tocsvfile['labels'] = y_train.tolist()
tocsvfile.to_csv('train.csv', index=False)

tocsvfile = image_features_test
tocsvfile['labels'] = y_test.tolist()
tocsvfile.to_csv('test.csv', index=False)

In [7]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
y_train = np.ravel(df_train.iloc[:,-1:].to_numpy())
y_test = np.ravel(df_test.iloc[:,-1:].to_numpy())
train_data = df_train[df_train.columns[:-1]]
test_data = df_test[df_test.columns[:-1]]

In [8]:
X_train2 = train_data
X_test2 = test_data
np.shape(test_data)
np.shape(train_data)


(4512, 96)

In [None]:
train_data = X_train2
test_data = X_test2
np.shape(test_data)
np.shape(train_data)

In [9]:
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

sel = SelectKBest(f_classif, k=40).fit(X_train2, y_train)
train_data = sel.transform(train_data)
test_data = sel.transform(test_data)


  y = column_or_1d(y, warn=True)
  f = msb / msw


In [10]:
print(np.shape(test_data))
print(np.shape(train_data))

(1128, 40)
(4512, 40)


In [11]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
X, y = train_data , y_train
clf.fit(X, y)
y_pred = clf.predict(test_data)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
prec = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

  y = column_or_1d(y, warn=True)


accuracy: 0.19326241134751773
f1: 0.17373216039184866
precison: 0.22920448834706647
recall: 0.19579390358262141


  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators = 50, random_state = 4235).fit(X, y)
y_pred = clf.predict(test_data)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
prec = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

  clf = RandomForestClassifier(n_estimators = 50, random_state = 4235).fit(X, y)


accuracy: 0.23581560283687944
f1: 0.22429907811307742
precison: 0.24009306134756317
recall: 0.23582699102276916


##### Random Forest

In [None]:
from lightgbm import LGBMClassifier

lightgbm_model = LGBMClassifier(
    boosting_type='gbdt',
    min_child_weight=0.001,
    max_depth=10
)
lightgbm_model.fit(X, y)
y_pred=lightgbm_model.predict(test_data)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
prec = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

##### LightGBM

##### Następnie sprawdzono klasyfikację przy ograniczeniu do 10 klas

In [None]:
def show_example_img(label_to_find):
    for _img, _label in zip(images,labels_history):
        if _label == label_to_find:
            plt.figure()
            plt.title(_label)
            plt.imshow(_img, cmap='gray')
            plt.show()
            return

In [None]:
def smaller_dataset(x, y, labels):
    new_train_data = []
    new_y = []
    for x_d,y_d in zip(x,y):
        if y_d in labels:
            new_train_data.append(x_d)
            new_y.append(y_d)
            
    return new_train_data, new_y

In [None]:
import random
size = 15

labels = random.sample(range(1, 47), size)
#labels = [5, 14]
#labels = encoder.transform(['dotted', 'polka-dotted'])

X, y = smaller_dataset(train_data, y_train, labels)
X_test_new, y_test_new = smaller_dataset(test_data, y_test, labels)

labels_strs = encoder.inverse_transform(labels)
print(f'chosen classes:\n{labels_strs}')
for labels_str in labels_strs:
    show_example_img(labels_str)

##### SVM 

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X, y)
y_pred = clf.predict(X_test_new)
acc = accuracy_score(y_test_new, y_pred)
f1 = f1_score(y_test_new, y_pred, average='macro')
prec = precision_score(y_test_new, y_pred, average='macro')
recall = recall_score(y_test_new, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

##### Random Forest

In [None]:
clf = RandomForestClassifier(n_estimators = 50, random_state = 4235).fit(X, y)
y_pred = clf.predict(X_test_new)
acc = accuracy_score(y_test_new, y_pred)
f1 = f1_score(y_test_new, y_pred, average='macro')
prec = precision_score(y_test_new, y_pred, average='macro')
recall = recall_score(y_test_new, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

##### LightGBM

In [None]:
lightgbm_model = LGBMClassifier(
    boosting_type='gbdt',
    min_child_weight=0.001,
    max_depth=10
)
lightgbm_model.fit(X, y)
y_pred=lightgbm_model.predict(X_test_new)
acc = accuracy_score(y_test_new, y_pred)
f1 = f1_score(y_test_new, y_pred, average='macro')
prec = precision_score(y_test_new, y_pred, average='macro')
recall = recall_score(y_test_new, y_pred, average='macro')

print(f'accuracy: {acc}\nf1: {f1}\nprecison: {prec}\nrecall: {recall}')

##### Na koniec przetestowano homogeniczność, kontrast, korelacja, odmienność

##### SVM 

##### Random Forest

##### LightGBM

#### WNIOSKI: