In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchvision
import torchvision.transforms as transforms
from torchinfo import summary
from datetime import datetime
from tqdm import tqdm

from datasets import HousingDataset
from model import Model
from utils import Select, CustomScale

In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(f"Running on device: {device}")

Running on device: cpu


In [3]:
mean=np.array([1377, 1354, 1381, 2356])
std=np.array([540, 398, 327, 515])

upperbound = mean + 3 * std
SCALE = upperbound[:, np.newaxis, np.newaxis]

norm_mean = mean / upperbound
norm_std = std / upperbound
    
transformations = [
    transforms.CenterCrop(size=(32, 32)), 
    CustomScale(scale=1/SCALE, clamp=(0, 1.0)),
    transforms.Normalize(mean=norm_mean, std=norm_std),
    Select(dim=-3, indices=[0,1,2]),
]
transform = transforms.Compose(transformations)

reverse_transform = transforms.Normalize(mean=-norm_mean[:3]/norm_std[:3], std=1/norm_std[:3]/SCALE)

In [4]:
train_set = HousingDataset("/atlas/u/erikrozi/housing_event_pred/data/train_seasonal_eff.csv", transform=transform)
print(len(train_set))
val_set = HousingDataset("/atlas/u/erikrozi/housing_event_pred/data/val_seasonal_eff.csv", transform=transform)
print(len(val_set))

25071
1411


In [5]:
# toy_indices = list(np.random.choice(np.arange(len(train_set)), size=(100,), replace=False))
# train_set = torch.utils.data.Subset(train_set, toy_indices)
# val_indices = list(np.random.choice(np.arange(len(val_set)), size=(100,), replace=False))
# val_set = torch.utils.data.Subset(val_set, val_indices)

In [6]:
train_loader = torch.utils.data.DataLoader(train_set, batch_size=16, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=16, shuffle=False, num_workers=8)

In [7]:
encoder = torch.nn.Sequential(
    torch.nn.Conv2d(3,16,1,padding=1),
    torch.nn.ReLU(),
    torch.nn.BatchNorm2d(16),
    torch.nn.Conv2d(16,32,1,padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(32),
    torch.nn.Conv2d(32,64,1,padding=1),
    torch.nn.ReLU(),
    torch.nn.BatchNorm2d(64),
    torch.nn.Conv2d(64,128,1,padding=1),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),
    torch.nn.BatchNorm2d(128),
).to(device=device)

# torchvision.models.resnet18(pretrained=True).to(device=device)
# encoder.layer4 = torch.nn.Identity()
# encoder.avgpool = torch.nn.Identity()
# encoder.fc = torch.nn.Identity()

# import sys
# sys.path.append("../housing_event_pred/models")
# import resnet_cifar
# encoder = resnet_cifar.ResNet18(img_channels=3).to(device=device)
# encoder.avgpool = torch.nn.Identity()
# encoder.fc = torch.nn.Identity()

In [8]:
model = Model(encoder).to(device=device).train()

# Evaluate model

In [9]:
%ls checkpoints/exp_2022-04-08T16:44:07Z/

ckpt_0.pt   ckpt_11.pt  ckpt_13.pt  ckpt_2.pt  ckpt_4.pt  ckpt_6.pt  ckpt_8.pt
ckpt_10.pt  ckpt_12.pt  ckpt_1.pt   ckpt_3.pt  ckpt_5.pt  ckpt_7.pt  ckpt_9.pt


In [10]:
model.load_state_dict(torch.load('checkpoints/exp_2022-04-08T16:44:07Z/ckpt_13.pt', map_location=device))

<All keys matched successfully>

In [11]:
def predict(model, dset, loops=1):
    model.eval()
    dloader = torch.utils.data.DataLoader(dset, batch_size=10, shuffle=False)
    predictions = []
    labels = []
    for _ in tqdm(range(loops)):
        for i, batch in enumerate(dloader):
            with torch.no_grad():
                img_start = batch["image_start"].to(device=device).float()
                img_end = batch["image_end"].to(device=device).float()
                img_sample = batch["image_sample"].to(device=device).float()
                label = batch["label"].float().to(device=device)

                pred = model(img_start, img_end, img_sample).flatten()
                predictions.append(pred.detach().cpu().numpy())
                labels.append(label.flatten().cpu().numpy())
    return np.concatenate(predictions), np.concatenate(labels).astype(int)

In [None]:
predictions, labels = predict(model, val_set, loops=10)

  0%|                                                    | 0/10 [03:25<?, ?it/s]


In [None]:
predictions = (predictions > 0.5)
accurate = np.sum(labels == predictions)
total = len(labels)
print(f"Accuracy {accurate} / {total} = {accurate/total}")

In [None]:
from sklearn.metrics import classification_report
print(classification_report(labels, predictions))