####ALL IMPORTS

In [155]:
import os
import random
from time import time
from glob import glob
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from collections import Counter
import dill as pickle
from plotly import graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from matplotlib import pyplot as plt
%matplotlib inline
import cv2
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import fbeta_score, confusion_matrix
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms as T, models
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from torchsummary import summary


import torch; 
torch.backends.mps.is_available()

True

In [156]:
d = "cpu"
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    d = "mps"
elif torch.cuda.is_available():
    d = "cuda:0"
device = torch.device(d)

print("Device: {}".format(device))

Device: mps


In [157]:
%ls

[34mArchive[m[m/
Archive.zip
[34mArchive_2[m[m/
[34mCOMP6721_Group_L[m[m/
Euro sat - Resnet 30 epochs without weights Transfer Learning.ipynb
Euro sat - Resnet 30 epochs without weights- Transfer Learning with 13 bands.ipynb
Euro sat - Resnet 30 epochs without weights-TSNE.ipynb
Euro sat - Resnet 30 epochs without weights.ipynb
Euro_sat_VGG.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET - TESTING DATA.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET - TSNE.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET.ipynb
PLANET_IMAGE_CLASSIFICATION-DENSENET121.ipynb
PLANET_IMAGE_CLASSIFICATION-VGG16.ipynb
[34mPlanetDataset[m[m/
Satellite_Classification_using_RESNET-TSNE.ipynb
Satellite_Classification_using_RESNET-oop.ipynb
Transfer_Learning_Satellite_Classification_using_RESNET.ipynb
densenet.pth
resnet.pth
rsicb_densenet.ipynb
sat_data_vgg_op.ipynb
train_results.pkl
train_results_densnet.pkl


In [158]:
data_dir = './PlanetDataset'
print(os.listdir(data_dir))

['.DS_Store', 'test-jpg', 'train_v2.csv', 'test-jpg-additional', 'train-jpg']


In [159]:
path = "./PlanetDataset/"
path_train = os.path.join(path, "train-jpg")
path_test = os.path.join(path, "test-jpg")
print(
    f"train files: {len(os.listdir(path_train))}, "
    f"test files: {len(os.listdir(path_test))}"
)

train files: 40479, test files: 40669


In [160]:
RARE_CLASSES = [
    "bare_ground", "selective_logging", "artisinal_mine", "blooming", "slash_burn", "blow_down", "conventional_mine"
]

In [161]:
class CustomDatasetError(Exception):
    pass


class CustomDataset(Dataset):
    def __init__(self, df, labels, transform, path, is_train=True, idx_tta=None):
        super().__init__()
        self.df = df
        self.labels = labels
        self.transform = transform
        if isinstance(path, str):
            self.paths = [path]
        elif isinstance(path, (list, tuple)):
            self.paths = path
        else:
            raise CustomDatasetError(f"Path type must be str, list or tuple, got: {type(path)}")
        self.is_train = is_train
        if not is_train:
            if not idx_tta in list(range(6)):
                raise CustomDatasetError(
                    f"In test mode, 'idx_tta' must be an int belonging to [0, 5], got: {repr(idx_tta)}"
                )
            self.idx_tta = idx_tta

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        filename = self.df.iloc[idx].image_name + ".jpg"
        for path in self.paths:
            if filename in os.listdir(path):
                file_path = os.path.join(path, filename)
                break
        else:
            raise CustomDatasetError(f"Can't fetch {filename} among {self.paths}")
        img = cv2.imread(file_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        label = self.labels[idx]
        return img, label

    def collate_fn(self, batch):
        imgs, labels = [], []
        for (img, label) in batch:
#             img = self.custom_augment(img)
            img = torch.tensor(img)
            img = img.permute(2, 0, 1)
            img = self.transform(img)
            imgs.append(img[None])
            labels.append(label)
        imgs = torch.cat(imgs).float().to(device)
        labels = torch.tensor(labels).float().to(device)
        return imgs, labels

    def load_img(self, idx, ax=None):
        img, labels = self[idx]
        label = self.df.iloc[idx].tags
        title = f"{label} - {labels}"
        if ax is None:
            plt.imshow(img)
            plt.title(title)
        else:
            ax.imshow(img)
            ax.set_title(title)
    
   

In [162]:
%ls

[34mArchive[m[m/
Archive.zip
[34mArchive_2[m[m/
[34mCOMP6721_Group_L[m[m/
Euro sat - Resnet 30 epochs without weights Transfer Learning.ipynb
Euro sat - Resnet 30 epochs without weights- Transfer Learning with 13 bands.ipynb
Euro sat - Resnet 30 epochs without weights-TSNE.ipynb
Euro sat - Resnet 30 epochs without weights.ipynb
Euro_sat_VGG.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET - TESTING DATA.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET - TSNE.ipynb
PLANET_IMAGE_CLASSIFICATION RESNET.ipynb
PLANET_IMAGE_CLASSIFICATION-DENSENET121.ipynb
PLANET_IMAGE_CLASSIFICATION-VGG16.ipynb
[34mPlanetDataset[m[m/
Satellite_Classification_using_RESNET-TSNE.ipynb
Satellite_Classification_using_RESNET-oop.ipynb
Transfer_Learning_Satellite_Classification_using_RESNET.ipynb
densenet.pth
resnet.pth
rsicb_densenet.ipynb
sat_data_vgg_op.ipynb
train_results.pkl
train_results_densnet.pkl


In [163]:
model = torch.load("./resnet.pth")
train_results = pickle.load(open("train_results.pkl", "rb"))

In [164]:
train_results

{'loss_train': [0.5604071781039238,
  0.37538221627473833,
  0.3026528775691986,
  0.26621142849326135,
  0.2582210019230843,
  0.255656486004591,
  0.2470903255045414,
  0.24035376347601414,
  0.2224280998110771,
  0.23372353315353395],
 'loss_val': [0.4276528537273407,
  0.30538668036460875,
  0.2803580641746521,
  0.23679961264133453,
  0.2375844568014145,
  0.23957369327545167,
  0.2232210487127304,
  0.21999317407608032,
  0.21510626375675201,
  0.20606468617916107],
 'score_train': [19.558823529411764,
  51.654411764705884,
  72.90441176470588,
  80.80882352941177,
  83.56617647058823,
  84.44852941176471,
  84.41176470588235,
  84.85294117647058,
  86.61764705882354,
  86.25],
 'score_val': [33.52941176470588,
  73.23529411764706,
  77.64705882352942,
  84.70588235294117,
  85.73529411764706,
  85.0,
  86.1764705882353,
  84.26470588235294,
  87.6470588235294,
  88.38235294117646],
 'Y_hat_val': array([[4.50200617e-01, 3.22249867e-02, 2.21465603e-02, 2.01298445e-02,
         1.4

In [165]:
loss_train = train_results["loss_train"]
loss_val = train_results["loss_val"]
score_train = train_results["score_train"]
score_val = train_results["score_val"]

fig = make_subplots(rows=1, cols=2, subplot_titles=("Loss", "Fbeta scores"))
fig.add_trace(
    go.Scatter(
        x=list(range(len(loss_train))),
        y=loss_train,
        name="loss_train",
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=list(range(len(loss_val))),
        y=loss_val,
        name="loss_val",
    ),
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=list(range(len(score_train))),
        y=score_train,
        name="score_train",
    ),
    row=1, col=2
)
fig.add_trace(
    go.Scatter(
        x=list(range(len(score_val))),
        y=score_val,
        name="score_val",
    ),
    row=1, col=2
)
fig.show()

In [168]:
def find_best_thresholds(Y_predicted, Y):
    N_tags = Y.shape[1]
    best_threshs = [0.2] * N_tags
    resolution = 100
    for jdx in range(N_tags):
        best_score = 0
        #threshs = np.zeros_like(best_threshs)
        threshs = best_threshs.copy()
        for kdx in range(resolution):
            kdx /= resolution
            threshs[jdx] = kdx
            Y_predicted_thresh = (Y_predicted > threshs).astype(float)
            score = calculate_average_accuracy(Y, Y_predicted_thresh)
            if score > best_score:
                best_score = score
                best_threshs[jdx] = kdx
    
    global_best_score = calculate_average_accuracy(Y, (Y_predicted > best_threshs).astype(float))
    print(f"threshs: {best_threshs} -- best score: {global_best_score}")
    
    return best_threshs

In [169]:
threshs = find_best_thresholds(Y_predicted_val, Y_val)


threshs: [0.39, 0.09, 0.12, 0.04, 0.03, 0.77, 0.18, 0.05, 0.11, 0.17, 0.17, 0.36, 0.71, 0.28, 0.04, 0.03, 0.3] -- best score: 94.11764705882352


In [170]:
model = torch.load("./resnet.pth")
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [178]:
def get_transforms_test():
    transform_test = T.Compose([
      T.ToPILImage(),
      T.Resize(224),
      T.ToTensor(),
      T.Normalize(
          mean=[0.485, 0.456, 0.406],
          std=[0.229, 0.224, 0.225],
      ),
      T.RandomRotation((90,180)),
      T.RandomVerticalFlip(p=0.5),
      T.RandomHorizontalFlip(p=0.5)
        
    ])
    
    return transform_test

In [187]:
def get_data_test(df_test):
    encoder = MultiLabelBinarizer()
    tags_test = encoder.fit_transform(df_test.list_tags.values)
   

    transform_test = get_transforms_test()
    ds_test = CustomDataset(df_test, tags_test, transform_test, path=path_train)
   
    dl_test = DataLoader(
      ds_test,
      batch_size=64,
      shuffle=True,
      collate_fn=ds_test.collate_fn
    )

    return ds_test, dl_test, encoder

In [188]:
path_class = os.path.join(path, "train_v2.csv")


df_classes = pd.read_csv(path_class) 

df_class_test=df_classes[0:1000]

df_class_test["list_tags"] = df_class_test.tags.str.split(" ")
classes = df_class_test.list_tags.values
tags = [tag for row in classes for tag in row]
counter_tags = Counter(tags)
df_tags = pd.DataFrame(
    {"Class": counter_tags.keys(), "Frequency": counter_tags.values()}
).sort_values("Frequency")



df_test,dl_test, encoder = get_data_test(df_class_test)


In [190]:
correct = 0.0
total=0;
j=0;
for samples, labels in dl_test:
    with torch.no_grad():
        samples, labels = samples, labels
        output = model(samples)
        # calculate accuracy
        output=(output.cpu().detach().numpy())
        labels=labels.cpu().detach().numpy()
        for i in range(0,17):
            output[j][i]=(output[j][i].astype(float)>=threshs[i])
            output[j][i]=output[j][i].astype(float)
            total=total+1;
            if(output[j][i]==labels[j][i]):
                    correct=correct+1;
        

        j=j+1
        
        
        
        
print('Accuracy of the network on {} test images: {}%'.format(len(dl_test ), round(correct*100.0/total, 2)))


Accuracy of the network on 16 test images: 91.54%
