In [2]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import torch.utils.data.dataset as dataset
import numpy as np

from dataset import TestDataset
from model import initialize_model

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
# Load trained model
net = initialize_model()

SAVED_MODEL_PATH = 'checkpoints/mobilenet_5_4'
net.load_state_dict(torch.load(SAVED_MODEL_PATH))

# Freeze layers
for param in net.parameters():
    param.requires_grad = False
    
# We just want to apply the feature extractor for now
net.classifier = nn.Identity()
net.to(device)

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(9, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [6]:
# Get dataset
DATA_FILE = 'E:/xplore_data/data/images.h5'
HEALTH_FILE = 'data/dhs_gps.csv'
dimages = TestDataset(DATA_FILE, HEALTH_FILE)
dimagesloader = torch.utils.data.DataLoader(dimages, batch_size=64, shuffle=False, num_workers=0)

In [38]:
# Apply feature extractor to the dataset
n = len(dimages)
extracted_features = torch.zeros(n, 1280+11+1)
i = 0
# Iterate over data.
for x, _, z in dimagesloader:
    x = x.to(device)
    j = i + x.shape[0]
    with torch.set_grad_enabled(False):
        outputs = net(x)
        extracted_features[i:j, :1280] = torch.squeeze(outputs).cpu()
        extracted_features[i:j, 1280:1291] = z[:, 14:25]
        extracted_features[i:j, 1291] = z[:, 3:14].mean(axis=1)
    i += 64

In [39]:
torch.isnan(extracted_features).nonzero()

tensor([], size=(0, 2), dtype=torch.int64)

In [90]:
torch.save(extracted_features, 'data/extracted_features_5_5.pt')

In [40]:
dfeatures = dataset.TensorDataset(extracted_features)

# Randomly split this dataset into train and test.
dtrain, dtest = dataset.random_split(dfeatures, (n - (n//5), n//5))

In [105]:
# Simple neural network to predict healthcare
predictor = nn.Sequential(
    nn.Linear(1280, 100),
    nn.Linear(100, 11)
)
predictor.to(device)

Sequential(
  (0): Linear(in_features=1280, out_features=100, bias=True)
  (1): Linear(in_features=100, out_features=11, bias=True)
)

In [106]:
print(dfeatures.__getitem__(0))
print(dfeatures.__getitem__(0)[0].shape)

(tensor([4.0893e-01, 1.3390e-03, 4.9997e-01,  ..., 2.8000e-01, 9.6000e-01,
        2.5000e+01]),)
torch.Size([1292])


In [1]:
# Train the predictor model
def train_model(model, dataloader, optimizer, scheduler, num_epochs=4, test_loader=None):
    for epoch in range(num_epochs):
        print('Epoch {}/{}: '.format(epoch, num_epochs - 1), end='')

        model.train()
        running_loss = 0.0
        criterion = nn.MSELoss(reduction='none')

        # Iterate over data.
        for rows in dataloader:
            x = rows[0][:, :1280].to(device)
            y = rows[0][:, 1280:1291].to(device)
            wt = rows[0][:, 1291].to(device)
            wt = wt / wt.sum()
            
            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(True):
                outputs = model(x).squeeze()
#                 print(outputs.shape)
#                 print(y.shape)
                loss = torch.sum(criterion(outputs, y).mean(axis=1) * wt)
                loss.backward()
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            # statistics
            running_loss += loss.item() * x.size(0)

        epoch_loss = running_loss / len(dtrain)
        print('Training loss: {:.4f}; '.format(epoch_loss), end='')
        test_loss = 0.0
        
        if test_loader is not None:
            for rows in test_loader:
                x = rows[0][:, :1280].to(device)
                y = rows[0][:, 1280:1291].to(device)
                wt = rows[0][:, 1291].to(device)
                wt = wt / wt.sum()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(False):
                    outputs = model(x).squeeze()
                    loss = torch.sum(criterion(outputs, y).mean(axis=1) * wt)

                # statistics
                test_loss += loss.item() * x.size(0)
        test_loss /= len(dtrain)
        print('Test loss: {:.4f}'.format(test_loss))

    return model


In [2]:
BATCH_SIZE = 100
EPOCHS = 60

# Data loader
dloader = torch.utils.data.DataLoader(dtrain, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
dtestloader = torch.utils.data.DataLoader(dtest, batch_size=512, shuffle=False, num_workers=0)
# Create training optimizer
optimizer = optim.Adam(predictor.parameters(), lr=0.001, weight_decay=0.01)


NameError: name 'torch' is not defined

In [3]:
predictor = train_model(predictor, dloader, optimizer, scheduler=None, 
                        num_epochs=EPOCHS, test_loader=dtestloader)

NameError: name 'predictor' is not defined

In [116]:
# Compute r-squared for training and test set
with torch.no_grad():
    split = [dtrain.indices, dtest.indices]
    tensor_inputs = [extracted_features[x, :1280].to(device) for x in split]
    outputs = [extracted_features[x, 1280:].numpy() for x in split]
    preds = [predictor(x).cpu().numpy() for x in tensor_inputs]

In [97]:
from sklearn.metrics import r2_score
for i in range(11):
    r2s = [r2_score(y[:, i], pred[:, i]) for y, pred in zip(outputs, preds)]
    print('%.4f, %.4f' % tuple(r2s))

-3.2049, -2.9289


IndexError: index 1 is out of bounds for axis 1 with size 1

In [121]:
torch.sigmoid(torch.Tensor(preds[0]))

tensor([[0.3010, 0.2286, 0.2625,  ..., 0.5021, 0.2756, 0.7460],
        [0.5411, 0.4225, 0.4948,  ..., 0.5461, 0.5478, 0.7979],
        [0.6258, 0.4632, 0.5914,  ..., 0.4686, 0.5956, 0.7626],
        ...,
        [0.6325, 0.4483, 0.5924,  ..., 0.4597, 0.6006, 0.7648],
        [0.5141, 0.3909, 0.4868,  ..., 0.4856, 0.5069, 0.7701],
        [0.3180, 0.2369, 0.2824,  ..., 0.4807, 0.2987, 0.7275]])

In [122]:
outputs[0]

array([[ 0.09836066,  0.09836066,  0.13114753, ...,  0.1147541 ,
         0.6785714 , 60.545456  ],
       [ 1.        ,  0.5555556 ,  0.8888889 , ...,  1.        ,
         1.        , 17.363636  ],
       [ 0.9411765 ,  0.7       ,  0.92156863, ...,  1.        ,
         0.9       , 47.090908  ],
       ...,
       [ 0.8888889 ,  0.6666667 ,  0.8888889 , ...,  0.8888889 ,
         0.8       , 25.90909   ],
       [ 0.88461536,  0.7692308 ,  0.88461536, ...,  0.9230769 ,
         0.8181818 , 24.636364  ],
       [ 0.47727272,  0.3181818 ,  0.3488372 , ...,  0.45454547,
         0.4       , 42.454544  ]], dtype=float32)