## Demo

In [2]:
%load_ext autoreload
%autoreload 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1]:
import os
import json
import glob

import pickle
import numpy as np
import pandas as pd
import torchinfo
import torch 
import torchmetrics
import torchgeo.models
import torchvision
from torch.utils.tensorboard import SummaryWriter

from models.from_config import build_from_config
from data_handlers.csv_dataset import CustomDatasetFromDataFrame
from utils import utils
from train import train
from test import test

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
CSV_PATH=os.path.join('data','geometry_less_dataset.csv')
DATA_DIR=os.path.join('data','landsat_tif','')
FOLD_PATH=os.path.join('data','dhs_incountry_folds.pkl')
CONFIG_FILE = os.path.join('configs','resnet18_ms_e2e_l7_1e2.json')

In [3]:
with open( CONFIG_FILE ) as f:
    config = json.load(f)
csv = pd.read_csv(CSV_PATH)
csv['filename']=csv.apply(
    lambda row:  str(row['lat'])[:5].replace('.','_')+"_"+str(row.lon)[:5].replace('.','_')+'.tif', axis=1
)
csv = csv.loc[:, ~csv.columns.str.contains('^Unnamed')]
csv.head()

Unnamed: 0,country,year,cluster,lat,lon,households,wealthpooled,filename
0,angola,2011,1,-12.350257,13.534922,36,2.312757,-12_3_13_53.tif
1,angola,2011,2,-12.360865,13.551494,32,2.010293,-12_3_13_55.tif
2,angola,2011,3,-12.613421,13.413085,36,0.877744,-12_6_13_41.tif
3,angola,2011,4,-12.581454,13.397711,35,1.066994,-12_5_13_39.tif
4,angola,2011,5,-12.578135,13.418748,37,1.750153,-12_5_13_41.tif


In [4]:
TRAIN_TRANSFORM = torch.nn.Sequential(
        torchvision.transforms.Resize(size=224),
        torchvision.transforms.CenterCrop(size=224),
        torchvision.transforms.RandomHorizontalFlip(p=0.5),
        torchvision.transforms.Normalize(
            mean=[42.7178, 42.9092, 43.2119, 42.8700, 42.7862, 42.7192, 42.8525],
            std =[104.3150, 104.7388, 105.4271, 104.6307, 104.5374, 104.3182, 104.5891]
            ),
        # torchvision.transforms.ColorJitter(),
    )
TEST_TRANSFORM  = torch.nn.Sequential(
        torchvision.transforms.Resize(size=224),
        torchvision.transforms.CenterCrop(size=224),
        torchvision.transforms.Normalize(
            mean=[42.7178, 42.9092, 43.2119, 42.8700, 42.7862, 42.7192, 42.8525],
            std =[104.3150, 104.7388, 105.4271, 104.6307, 104.5374, 104.3182, 104.5891]
            ),
    )

In [9]:
# Spatially Aware Cross-Validation
with open(FOLD_PATH, 'rb') as f:
    folds = pickle.load(f)
results = dict()
device = "cuda" if torch.cuda.is_available() else "cpu"

for fold in folds:
    writer = SummaryWriter()
    r2 = torchmetrics.R2Score().to(device=device)
    # Index split
    train_split = folds[fold]['train'][:3200]
    val_split = folds[fold]['val'][:1280]
    test_split = folds[fold]['test'][:1280]
    # CSV split
    train_df = csv.iloc[train_split]
    val_df = csv.iloc[val_split]
    test_df = csv.iloc[test_split]
    # Datasets
    train_dataset = CustomDatasetFromDataFrame(train_df, DATA_DIR, transform=TRAIN_TRANSFORM )
    val_dataset = CustomDatasetFromDataFrame(val_df, DATA_DIR, transform=TEST_TRANSFORM )
    test_dataset  = CustomDatasetFromDataFrame(test_df, DATA_DIR, transform=TEST_TRANSFORM )
    # DataLoaders
    train_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=config['batch_size'], 
        shuffle=True,
        num_workers=4
    )

    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=2,
    )
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )
    # TRANSFER LEARNING SCENARIO
    base_model = torchgeo.models.resnet18(weights=torchgeo.models.ResNet18_Weights.SENTINEL2_ALL_MOCO)
    model = build_from_config( base_model=base_model, config_file=CONFIG_FILE )
    model = model.to(device=device)
    # print(torchinfo.summary(
    #     model, 
    #     input_size=(int(config['batch_size']), 7, 224, 224), # make sure this is "input_size", not "input_shape" (batch_size, color_channels, height, width)
    #     verbose=0,
    #     col_names=["input_size","trainable"],
    #     col_width=20,
    #     row_settings=["var_names"]
    # ))
    # CONFIGURE LOSS, OPTIM
    loss_fn = utils.configure_loss( config )
    optimizer = utils.configure_optimizer( config, model )
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer)
    print(f"Training on fold {fold}")
    results = train(
    model=model,
    train_dataloader=val_loader,
    val_dataloader=test_loader,
    optimizer=optimizer,
    scheduler=scheduler,
    loss_fn=loss_fn,
    epochs=config['n_epochs'],
    batch_size=config['batch_size'],
    in_channels=config['in_channels'],
    writer=writer,
    device=device,
    r2=r2
    )
    torch.save(model.state_dict(), config['checkpoint_path']+str(fold)+".pth")
    utils.free_gpu_cache()

Training on fold A


3. Test Results

In [None]:
test_r2, Y_true, Y_pred = test(model=model, dataloader=val_loader, device=device)
# Y_true = [ utils.denormalize_asset(asset) for asset in Y_true]
# Y_pred = [ utils.denormalize_asset(asset) for asset in Y_pred]
results = pd.DataFrame({
    'true index':np.array(Y_true),
    'predicted index':np.array(Y_pred)
})
from scipy.stats import pearsonr
import seaborn as sns
sns.set_palette("rocket")
sns.regplot(x='true index', y='predicted index', data=results).set(title='R2 = '+str(test_r2))