# Improving Remote Poverty Detection with Multiview Learning — TESTING
This notebook explores the data and tests the methods using only a subset of the data (Ethiopia) to more efficeintly narrow the search space for methods and hyperparameters.
## SETUP

In [1]:
from google.colab import files, drive
drive.mount('/content/drive')
! pip install geoio

Mounted at /content/drive
Collecting geoio
[?25l  Downloading https://files.pythonhosted.org/packages/4b/26/2647daf5ef5cfb9327abb2709024810cd9b1916f7b334eee33fb81af9073/geoio-1.3.0-py3-none-any.whl (60kB)
[K     |████████████████████████████████| 61kB 5.0MB/s 
Collecting xmltodict
  Downloading https://files.pythonhosted.org/packages/28/fd/30d5c1d3ac29ce229f6bdc40bbc20b28f716e8b363140c26eff19122d8a5/xmltodict-0.12.0-py2.py3-none-any.whl
Collecting tzwhere
[?25l  Downloading https://files.pythonhosted.org/packages/3d/e9/18e4822f6e4640332b97c744378da427bc28d2399235520349bb17e06aa4/tzwhere-3.0.3.tar.gz (23.7MB)
[K     |████████████████████████████████| 23.7MB 1.3MB/s 
Collecting tinytools
  Downloading https://files.pythonhosted.org/packages/c3/e6/e335406a22be352c8b680ed5d4d28937ac911dbda9756625006e502bf787/tinytools-1.1.0-py3-none-any.whl
Building wheels for collected packages: tzwhere
  Building wheel for tzwhere (setup.py) ... [?25l[?25hdone
  Created wheel for tzwhere: filename=

In [2]:
!ls drive/MyDrive/detecting-poverty/data/landsat

eth  mw  ng


In [3]:
! cp drive/MyDrive/detecting-poverty/modules/* ./

In [4]:
import numpy as np
import torch
import pandas as pd
from data_loaders import LandsatViirs, LandsatTransform, ViirsTransform
from conv_ved import ConvVED, ResnetVAE
import utils
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
import geoio
import matplotlib.pyplot as plt
%matplotlib inline
from utils import create_space
import os
from PIL import Image
import torchvision.transforms.functional as TF

# to handle truncated images
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import random
torch.random.manual_seed(31220)
random.seed(31220)

# for better traceback with CUDA errors
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

## The DATA
### Survey and Reference Data

In [5]:
full_reference_data = pd.read_csv('drive/MyDrive/detecting-poverty/data/image_download_actual.csv')
print(full_reference_data.shape)
full_reference_data.head(3)

(24714, 10)


Unnamed: 0,image_name,image_lat,image_lon,cluster_lat,cluster_lon,cons_pc,nightlights,country,nightlights_bin,is_train
0,-17.09515_35.17229723579403_-17.09515_35.21721...,-17.09515,35.172297,-17.09515,35.217213,1.423239,0.025206,mw,0,True
1,-17.08017807859801_35.17229723579403_-17.09515...,-17.080178,35.172297,-17.09515,35.217213,1.423239,0.025206,mw,0,True
2,-17.125093842803985_35.18726915719602_-17.0951...,-17.125094,35.187269,-17.09515,35.217213,1.423239,0.025206,mw,0,False


In [6]:
NIGHTLIGHTS_DIRS = ['drive/MyDrive/detecting-poverty/data/viirs/viirs_2015_' + tif_name for tif_name in ['00N060W.tif', '75N060W.tif']]
viirs_tifs = [geoio.GeoImage(ndir) for ndir in NIGHTLIGHTS_DIRS]

## Convolutional Variation Resnet-Encoder Convolutional-Decoder 
### Data Pipeline

In [7]:
# create dataset objects
ref_features = ['image_lat', 'image_lon', 'image_name', 'country']
target = 'cons_pc'

traindev_ref_data, test_ref_data, Ytraindev, Ytest = train_test_split(
    full_reference_data[ref_features],
    full_reference_data[target],
    train_size=0.9
)

train_ref_data, dev_ref_data, Ytrain, Ydev = train_test_split(
    traindev_ref_data,
    Ytraindev,
    train_size=0.9
)

viirs_transform = ViirsTransform(viirs_tifs)
landsat_transform = LandsatTransform('drive/MyDrive/detecting-poverty/data/landsat', width=224, height=224)

# training_loader = LandsatViirs(
#     df=train_ref_data, 
#     viirs_transform=viirs_transform,
#     landsat_transform=landsat_transform
# )
# dev_loader = LandsatViirs(
#     df=dev_ref_data, 
#     viirs_transform=viirs_transform,
#     landsat_transform=landsat_transform
# )

### Model Testing

In [8]:
# TESTING MODULES WITH ONLY ETHIOPIA DATA

# init model
conv_ved = ConvVED(
    n_components=64,
    net=ResnetVAE,
    image_in_channels=3,
    image_out_channels=1,
    lr=2e-3, 
    batch_size=32, 
    kkl=1, 
    kv=1, 
    path='resnet_ved.pth',
    # cuda=False
)

# data loaders
eth_training_loader = LandsatViirs(
    df=train_ref_data[train_ref_data.country == 'eth'], 
    viirs_transform=viirs_transform,
    landsat_transform=landsat_transform
)
eth_dev_loader = LandsatViirs(
    df=dev_ref_data[dev_ref_data.country == 'eth'], 
    viirs_transform=viirs_transform,
    landsat_transform=landsat_transform
)

# train
conv_ved.fit(
    eth_training_loader, Xd=eth_dev_loader, epochs=5
)


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))






















Epoch: 1, train loss: 76.8966, dev loss: 54.2736




















Epoch: 2, train loss: 49.8407, dev loss: 54.5703




















Epoch: 3, train loss: 46.9546, dev loss: 47.3767




















Epoch: 4, train loss: 45.5994, dev loss: 49.2025




















Epoch: 5, train loss: 44.8698, dev loss: 45.8195


### Consumption Prediction

In [9]:
eth_train_features = conv_ved.transform(eth_training_loader)
eth_dev_features = conv_ved.transform(eth_dev_loader)



















In [13]:
eth_dev_features

array([[ 9.16887820e-03,  2.90045321e-01, -2.22803168e-02, ...,
         8.92694816e-02,  2.60672085e-02,  6.55796751e-02],
       [ 8.97720903e-02,  1.07567012e-01, -9.15460885e-02, ...,
         4.39216793e-02,  4.42849435e-02,  8.88396278e-02],
       [ 1.04570314e-01, -7.41763636e-02, -4.95246239e-02, ...,
        -8.34183618e-02,  1.08730048e-04,  6.86317086e-02],
       ...,
       [ 1.46973655e-01, -1.41513795e-01, -4.87673432e-02, ...,
        -1.01864114e-01,  6.64782897e-03,  5.94448820e-02],
       [ 3.36624235e-02,  2.28182629e-01, -2.38683559e-02, ...,
         7.83891678e-02,  3.47213708e-02,  5.47556952e-02],
       [ 9.75299999e-02, -8.45685303e-02, -4.06007022e-02, ...,
        -1.01433724e-01, -2.43669003e-03,  6.31585568e-02]], dtype=float32)

In [15]:
from predictors import elastic_net, logistic

results, downstream_model = elastic_net(
    Xtrain=eth_train_features, 
    Xdev=eth_dev_features, 
    Ytrain=Ytrain[train_ref_data.country == 'eth'], 
    Ydev=Ydev[dev_ref_data.country == 'eth'], 
    verbose=True,
    # scoring='f1'
)


Training Elastic Net

{
    "R2": 0.22203047807033618,
    "alpha": 0.0033563209509457105,
    "l1_ratio": 1.0,
    "coefficients": [
        0.0,
        0.024037132039666176,
        -0.0,
        0.0,
        12.671607971191406,
        0.16202960908412933,
        0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        0.0,
        0.0,
        0.10537350177764893,
        -6.101308345794678,
        -0.0,
        -0.0,
        -0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        -0.0,
        0.0,
        -0.0,
        0.0,
        0.0,
        0.0,
        -0.0,
        -0.0,
        -0.0,
        0.0,
        -0.0,
        0.0,
        0.0,
        -0.04113839194178581,
        0.0,
        0.0,
        0.0,
        -0.0,
        -0.0,
        -0.0,
        0.0,
        -0.0,
        0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        0.0,


## Long Training

In [16]:
# TESTING MODULES WITH ONLY ETHIOPIA DATA

# init model
conv_ved = ConvVED(
    n_components=64,
    net=ResnetVAE,
    image_in_channels=3,
    image_out_channels=1,
    lr=2e-3, 
    batch_size=32, 
    kkl=1, 
    kv=1, 
    path='resnet_ved.pth',
    # cuda=False
)

# train
conv_ved.fit(
    eth_training_loader, Xd=eth_dev_loader, epochs=20
)



















Epoch: 1, train loss: 71.5220, dev loss: 100.4904




















Epoch: 2, train loss: 49.4342, dev loss: 47.9953




















Epoch: 3, train loss: 46.2290, dev loss: 52.2485




















Epoch: 4, train loss: 44.4606, dev loss: 43.9168




















Epoch: 5, train loss: 43.5574, dev loss: 41.3253




















Epoch: 6, train loss: 42.4638, dev loss: 41.3266




















Epoch: 7, train loss: 42.2820, dev loss: 44.2905




















Epoch: 8, train loss: 42.0269, dev loss: 41.6938




















Epoch: 9, train loss: 41.8620, dev loss: 43.3501




















Epoch: 10, train loss: 41.4423, dev loss: 41.5202




















Epoch: 11, train loss: 41.1693, dev loss: 40.2674




















Epoch: 12, train loss: 40.9961, dev loss: 48.1039




















Epoch: 13, train loss: 41.6708, dev loss: 42.4469




















Epoch: 14, train loss: 40.9442, dev loss: 40.5675




















Epoch: 15, train loss: 40.6569, dev loss: 41.6577




















Epoch: 16, train loss: 40.7690, dev loss: 40.8454




















Epoch: 17, train loss: 40.5376, dev loss: 41.0402




















Epoch: 18, train loss: 40.4080, dev loss: 43.8322




















Epoch: 19, train loss: 40.4519, dev loss: 41.2519




















Epoch: 20, train loss: 40.2125, dev loss: 42.9612


In [17]:
eth_train_features = conv_ved.transform(eth_training_loader)
eth_dev_features = conv_ved.transform(eth_dev_loader)



















In [18]:
results, downstream_model = elastic_net(
    Xtrain=eth_train_features, 
    Xdev=eth_dev_features, 
    Ytrain=Ytrain[train_ref_data.country == 'eth'], 
    Ydev=Ydev[dev_ref_data.country == 'eth'], 
    verbose=True,
    # scoring='f1'
)


Training Elastic Net

{
    "R2": 0.2083314385171966,
    "alpha": 0.0030967362952365133,
    "l1_ratio": 0.99,
    "coefficients": [
        -0.0,
        0.0,
        0.0,
        -0.0,
        -0.0,
        0.0,
        -0.0,
        -0.0,
        -0.0,
        0.0,
        0.0,
        4.403697967529297,
        14.625930786132812,
        -0.0,
        -0.0,
        -0.0,
        -0.0,
        0.3962784707546234,
        0.0,
        0.6473495364189148,
        -0.0,
        -0.0,
        1.7199559211730957,
        0.0,
        0.0,
        0.0,
        -5.310793399810791,
        0.0,
        0.0,
        0.0,
        -0.0,
        -0.0,
        -40.650413513183594,
        0.0,
        -2.1761157512664795,
        0.0,
        0.0,
        -0.0,
        -0.0,
        0.0,
        -0.0,
        0.0,
        0.0,
        0.0,
        -0.0,
        0.0,
        -0.0,
        -0.0,
        -3.5292348861694336,
        -12.791156768798828,
        -0.0,
        -0.0,
        0.0,
 