# Binary Classification: 
### Available Rooftop Area vs No Available Rooftop Area
Using a pre-trained ResNet18 to classify satellite images in: Available Rooftop Area vs No Available Rooftop Area

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd '/content/drive/MyDrive/Geo-Area-Classifier'

In [None]:
import torch
from torch.utils.data import DataLoader
import torchvision.models as models

import pandas as pd 

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt 
import matplotlib.image as img

import os

from data_helpers import dataset
from training import training, testing
from plot import plot_train_val
from models import CNN, initialize_model

%matplotlib inline

In [None]:
# loading dataset
train = pd.read_csv('data/train.csv', header=None, names=['id','label'])
train['label'] = train['label'].apply(lambda x: 0 if x==2 else 1)
test = pd.read_csv('data/test.csv', header=None, names=['id','label'])
test['label'] = test['label'].apply(lambda x: 0 if x==2 else 1)

train_path = 'data/images/train/'
test_path = 'data/images/test/'

train.head()

In [None]:
print('Total number of images in the Training Set: ', train.shape[0])
print('Total number of images in the Test Set: ', test.shape[0])

label = 'NoARA', 'ARA'
plt.figure(figsize = (5,5))
plt.pie(train.groupby('label').size(), labels = label, autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('Percentage of images per class (in the Training Set)')
plt.show()

In [None]:
# Few examples
print('No Available Rooftop Area: ')
fig,ax = plt.subplots(1,5,figsize = (15,3))
for i,idx in enumerate(train[train['label'] == 0]['id'][-5:]):
    path = os.path.join(train_path,idx)
    ax[i].imshow(img.imread(path))
plt.show()

print('Available Rooftop Area: ')
fig,ax = plt.subplots(1,5,figsize = (15,3))
for i,idx in enumerate(train[train['label'] == 1]['id'][-5:]):
    path = os.path.join(train_path,idx)
    ax[i].imshow(img.imread(path))
plt.show()

In [None]:
# splitting train in train+valid
train, valid = train_test_split(train, stratify=train.label, test_size=0.1)

# get the images dataset access 
train_data = dataset(train, train_path)
valid_data = dataset(valid, train_path)
test_data = dataset(test, test_path)

In [None]:
# Hyper parameters
num_epochs = 150
batch_size = 10
learning_rate = 0.0000005

# set device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("Device used: ", device.type)

In [None]:
# data loeader
train_loader = DataLoader(dataset = train_data, batch_size = batch_size, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset = valid_data, batch_size = batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(dataset = test_data, batch_size = batch_size, shuffle=False, num_workers=0)

In [None]:
model,_ = initialize_model('resnet', 2, feature_extract=False, use_pretrained=True)
if torch.cuda.is_available():
    model.cuda()

w = torch.Tensor( train.label.value_counts()/train.label.sum() ).to(device)
criterion = torch.nn.CrossEntropyLoss(weight = w)
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)

In [None]:
training(model, device, train_loader, valid_loader, optimizer, criterion, num_epochs)

In [None]:
testing(model, device, train_loader, 'Training')
testing(model, device, valid_loader, 'Validation')
testing(model, device, test_loader, 'Test')