In [0]:
#The code chunk downloads and unzips the required data  

import urllib.request
import zipfile

url = 'http://madm.dfki.de/files/sentinel/EuroSAT.zip'
urllib.request.urlretrieve(url,"2750.zip")
zf = zipfile.ZipFile("2750.zip")
zf.extractall()

In [18]:
pip uninstall opencv-python



In [19]:
pip install opencv-contrib-python==3.4.2.16



In [0]:
import cv2
import numpy as np
import os
import shutil
import random
from pathlib import Path
from sklearn.model_selection import train_test_split
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans

In [0]:
from fastai.vision import *
from fastai.metrics import error_rate

data_path= os.getcwd()
path= datapath4file(data_path+'/2750')

In [22]:
print(path)

/content/2750


In [23]:
root_dir = path
print(root_dir)

/content/2750


In [0]:
classes_dir = ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture', 'PermanentCrop', 'Residential', 'River', 'SeaLake']
test_ratio = 0.20

In [0]:
#remove already existing train and test folder (if any)

if os.path.exists(root_dir / 'train'):
  shutil.rmtree(root_dir / 'train', ignore_errors=False, onerror=None)

if os.path.exists(root_dir / 'test'):
  shutil.rmtree(root_dir / 'test', ignore_errors=False, onerror=None)

In [26]:
for cls in classes_dir:
    os.makedirs(root_dir / 'train' / cls)
    os.makedirs(root_dir / 'test' / cls)


    # Creating partitions of the data after shuffeling
    src = root_dir / cls # Folder to copy images from

    allFileNames = os.listdir(src)
    np.random.shuffle(allFileNames)
    train_FileNames, test_FileNames = np.split(np.array(allFileNames),
                                                              [int(len(allFileNames)* (1 - test_ratio))])


    train_FileNames = [src / name for name in train_FileNames.tolist()]
    test_FileNames = [src / name for name in test_FileNames.tolist()]

    print('Total images: ', len(allFileNames))
    print('Training: ', len(train_FileNames))
    print('Testing: ', len(test_FileNames))

    # Copy-pasting images
    for name in train_FileNames:
        shutil.copy(name, root_dir / 'train' / cls)

    for name in test_FileNames:
        shutil.copy(name, root_dir / 'test' / cls)

Total images:  3000
Training:  2400
Testing:  600
Total images:  3000
Training:  2400
Testing:  600
Total images:  3000
Training:  2400
Testing:  600
Total images:  2500
Training:  2000
Testing:  500
Total images:  2500
Training:  2000
Testing:  500
Total images:  2000
Training:  1600
Testing:  400
Total images:  2500
Training:  2000
Testing:  500
Total images:  3000
Training:  2400
Testing:  600
Total images:  2500
Training:  2000
Testing:  500
Total images:  3000
Training:  2400
Testing:  600


In [27]:
train_path = root_dir/'train'
training_names = os.listdir(train_path)
training_names
# print(train_path)

['River',
 'Forest',
 'AnnualCrop',
 'Industrial',
 'SeaLake',
 'Pasture',
 'PermanentCrop',
 'HerbaceousVegetation',
 'Highway',
 'Residential']

In [28]:
test_path = root_dir/'test'
test_path = os.listdir(test_path)
print(test_path)

['River', 'Forest', 'AnnualCrop', 'Industrial', 'SeaLake', 'Pasture', 'PermanentCrop', 'HerbaceousVegetation', 'Highway', 'Residential']


In [29]:
test_path

['River',
 'Forest',
 'AnnualCrop',
 'Industrial',
 'SeaLake',
 'Pasture',
 'PermanentCrop',
 'HerbaceousVegetation',
 'Highway',
 'Residential']

In [0]:
# takes all images and convert them to grayscale. 
# return a dictionary that holds all images category by category. 

# def load_images_from_folder(folder):
#     images = {}
#     for filename in os.listdir(folder):
#         category = []
#         path = str(folder) + "/" + filename
#         print(path)
#         for cat in os.listdir(path):
#             img = cv2.imread(str(path) + "/" + cat,0)
#             if img is not None:
#               print("done")
#               #cv2.imwrite("/content/2750/result.jpg", img)
#             break
#     return images

def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        category = []
        path = str(folder) + "/" + filename
        for cat in os.listdir(path):
            img = cv2.imread(str(path) + "/" + cat,0)
            #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if img is not None:
                category.append(img)
        images[filename] = category
    return images

images = load_images_from_folder(root_dir/'train')  # take all images category by category 
test = load_images_from_folder(root_dir/'test') # take test images 

In [0]:
# Creates descriptors using sift 
# Takes one parameter that is images dictionary
# Return an array whose first index holds the decriptor_list without an order
# And the second index holds the sift_vectors dictionary which holds the descriptors but this is seperated class by class
def sift_features(images):
    sift_vectors = {}
    descriptor_list = []
    sift = cv2.xfeatures2d.SIFT_create()
    for key,value in images.items():
        features = []
        for img in value:
            kp, des = sift.detectAndCompute(img,None)
            if des is not None:
              descriptor_list.extend(des)
              features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(images) 
# Takes the descriptor list which is unordered one
descriptor_list = sifts[0] 
# Takes the sift features that is seperated class by class for train data
all_bovw_feature = sifts[1] 
# Takes the sift features that is seperated class by class for test data
test_bovw_feature = sift_features(test)[1] 

In [0]:
# A k-means clustering algorithm who takes 2 parameter which is number 
# of cluster(k) and the other is descriptors list(unordered 1d array)
# Returns an array that holds central points.
def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_ 
    return visual_words
    
# Takes the central points which is visual words    
visual_words = kmeans(5, descriptor_list) 

In [0]:
#pip install pyyaml h5py

In [0]:
# from tensorflow.keras.models import load_model

# kmeans.save('my_model1.h5')  # creates a HDF5 file 'my_model.h5'

# #kmeans = load_model('my_model1')

In [0]:
# Takes 2 parameters. The first one is a dictionary that holds the descriptors that are separated class by class 
# And the second parameter is an array that holds the central points (visual words) of the k means clustering
# Returns a dictionary that holds the histograms for each images that are separated class by class. 

def find_index(image, center):
    count = 0
    ind = 0
    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i]) 
           #count = L1_dist(image, center[i])
        else:
            dist = distance.euclidean(image, center[i]) 
            #dist = L1_dist(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

def image_class(all_bovw, centers):
    dict_feature = {}
    for key,value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature
    
# Creates histograms for train data    
bovw_train = image_class(all_bovw_feature, visual_words) 
# Creates histograms for test data
bovw_test = image_class(test_bovw_feature, visual_words) 


In [0]:
# 1-NN algorithm. We use this for predict the class of test images.
# Takes 2 parameters. images is the feature vectors of train images and tests is the feature vectors of test images
# Returns an array that holds number of test images, number of correctly predicted images and records of class based images respectively
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}
    
    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0] # [correct, all]
        for tst in test_val:
            predict_start = 0
            #print(test_key)
            minimum = 0
            key = "a" #predicted
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        #minimum = L1_dist(tst,train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        #dist = L1_dist(tst,train)
                        if(dist < minimum):
                            minimum = dist
                            key = train_key
            
            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
            #print(minimum)
    return [num_test, correct_predict, class_based]
    
# Call the knn function    
results_bowl = knn(bovw_train, bovw_test) 


In [37]:
# Calculates the average accuracy and class based accuracies.  
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("\nClass based accuracies: \n")
    for key,value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(key + " : %" + str(acc))
        
# Calculates the accuracies and write the results to the console.       
accuracy(results_bowl) 

Average accuracy: %34.50487429451001

Class based accuracies: 

River : %51.369863013698634
Forest : %0.0
AnnualCrop : %29.853479853479854
Industrial : %66.4
SeaLake : %0.0
Pasture : %5.928853754940711
PermanentCrop : %22.587268993839835
HerbaceousVegetation : %15.698924731182796
Highway : %16.768916155419223
Residential : %58.87372013651877
