In [1]:
from setup import proj_dir, out_dir, data_dir, image_dir, model_dir
%load_ext autoreload
%autoreload 2

from collections import OrderedDict
import os


import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import seaborn as sns
import numpy as np
from time import time

import torch
import torch.nn as nn
import torch.optim
import torch.utils.data
import torch.backends.cudnn
import torchvision.utils
import torchvision.transforms

import glob
import pickle as pkl

from dataloader import image_loader
from M1_util_train_test import load_model
from util_image import inverse_transform


In [2]:
model_type = 'SAE'
load_model_name = 'Autoencoder'
zoomlevel = 'zoom13'
output_dim = 3
model_run_date = '22021407'
data_version = '1571'
sampling = 'stratified'
normalization = 'minmax'

variable_names = ['trpgen','active','auto','mas','pt']

demo_variables = ['tot_population','pct25_34yrs','pct35_50yrs','pctover65yrs',
         'pctwhite_alone','pct_nonwhite','pctblack_alone',
         'pct_col_grad','avg_tt_to_work','inc_per_capita']

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [3]:
args = {'image_size': 224, 
        'depth': -1,
       'base_channels':64,
       'output_dim':output_dim,
       'num_demo_vars':len(demo_variables),
       'demo_norm': normalization,        
       'cardinality':1,
       'epochs':20,
       'batch_size':16,
       'outdir':out_dir,
       'num_workers':8,
       'tensorboard':False,
       'save':False}

model_config = OrderedDict([
    ('arch', 'resnext'),
    ('depth', args['depth']),
    ('base_channels', args['base_channels']),
    ('cardinality', args['cardinality']),
    ('input_shape', (1, 3, 32, 32)),
    ('output_dim', args['output_dim']),
    ('num_demo_vars', args['num_demo_vars'])
])
    
optim_config = OrderedDict([
    ('batch_size', args['batch_size'])
])

data_config = OrderedDict([
    ('dataset', 'CIFAR10'),
    ('image_size', args['image_size']),
    ('demo_norm', args['demo_norm'])
])

run_config = OrderedDict([
    ('outdir', args['outdir']),
    ('save', args['save']),
    ('num_workers', args['num_workers']),
    ('tensorboard', args['tensorboard']),
])

config = OrderedDict([
    ('model_config', model_config),
    ('optim_config', optim_config),
    ('data_config', data_config),
    ('run_config', run_config),
])

# Load Model

In [4]:
# Check one model exists for this config
model_path = glob.glob(model_dir+model_type+"_"+zoomlevel+"_"+str(model_config['output_dim']**2*2048)+"_"+
                       model_run_date+"_*.pt")
#
if len(model_path) == 1:
    saved = torch.load(model_path[0])
    print(model_path[0], "loaded.")
else:
    print("Error. More than one model or no model exists.")
    print(model_path)
    print(model_dir+model_type+"_"+zoomlevel+"_"+str(model_config['output_dim']**2*2048)+"_"
                      +model_run_date+"_*.pt")

/dream/qingyi/image_chicago/models/SAE_zoom13_18432_22021407_100.pt loaded.


In [5]:
# load model
config['model_config']['input_shape'] = (1,3,data_config['image_size'],data_config['image_size'])

encoder = load_model(config['model_config']['arch'], 'Encoder', config['model_config'])

config['model_config']['input_shape'] = [1,2048,config['model_config']['output_dim'],config['model_config']['output_dim']]

config['model_config']['conv_shape'] = [data_config['image_size']//32,data_config['image_size']//32]
config['model_config']['output_channels'] = 3

decoder = load_model(config['model_config']['arch'], 'Decoder', config['model_config'])

config['encoder'] = encoder
config['decoder'] = decoder

model = load_model('autoencoder',load_model_name, config)


In [6]:
if 'train_loss' in saved.keys() and 'test_loss' in saved.keys():
    print(saved['train_loss'])
    print(saved['test_loss'])
else:
    epoch = saved['epoch']
    model.load_state_dict(saved['model_state_dict']);
    model = model.to(device)
    model.eval()
    train_loader, test_loader = image_loader(image_dir+zoomlevel+"/", data_dir, optim_config['batch_size'], run_config['num_workers'], 
                                             data_config['image_size'], data_version=data_version,
                                             sampling=sampling, recalculate_normalize=False)
    
    if model_type == 'AE':
    
        from BM1_util_train_test import train, test, AverageMeter
        criterion = nn.MSELoss(reduction='mean')
        loss_meter = AverageMeter()

        for step, (image_list, data) in enumerate(test_loader):
            data = data.to(device)
            out_image = model(data)
            loss = criterion(out_image, data)
            loss_ = loss.item()
            loss_meter.update(loss_, data.size(0)) 
            if step % 10 == 0:
                print(step, end='\t')

        best_test = loss_meter.avg
        print(best_test)

        loss_meter = AverageMeter()

        for step, (image_list, data) in enumerate(train_loader):
            data = data.to(device)
            out_image = model(data)

            loss = criterion(out_image, data)
            loss_ = loss.item()
            loss_meter.update(loss_, data.size(0))
            if step % 10 == 0:
                print(step, end='\t')

        best = loss_meter.avg
        print(best)

        with open(out_dir+"AE_train.csv", "a") as f:
            f.write("%s,%s,%d,%.4f,%.4f\n" % (model_run_date, sampling, epoch, best, best_test))
    elif model_type == 'SAE':
        from M1_util_train_test import train, test, AverageMeter
        from util_model import my_loss
        from dataloader import load_demo
                                                              
        criterion = my_loss
        demo_cs, demo_np = load_demo(data_dir, norm=normalization)
                                      
        loss_meter_1 = AverageMeter()
        loss_meter_2 = AverageMeter()
                                                              
        for step, (image_list, data) in enumerate(test_loader):
            
            census_index = [demo_cs.index(i[i.rfind('/')+1:i.rfind('_')]) for i in image_list]
            census_data = demo_np[census_index]

            census_data = torch.tensor(census_data).to(device)
            data = data.to(device)

            out_image, out_demo = model(data)
            loss1, loss2 = criterion(out_image, out_demo, data, census_data, return_components=True)
            num = data.size(0)

            loss_meter_1.update(loss1.item(), num)
            loss_meter_2.update(loss2.item(), num)

            if step % 10 == 0:
                print(step, end='\t')
            
        best_test_1 = loss_meter_1.avg
        best_test_2 = loss_meter_2.avg
        print(best_test_1, best_test_2)         
                                                              
        loss_meter_1 = AverageMeter()
        loss_meter_2 = AverageMeter()                                                              
        for step, (image_list, data) in enumerate(train_loader):
            census_index = [demo_cs.index(i[i.rfind('/')+1:i.rfind('_')]) for i in image_list]
            census_data = demo_np[census_index]

            census_data = torch.tensor(census_data).to(device)
            data = data.to(device)

            out_image, out_demo = model(data)

            loss1, loss2 = criterion(out_image, out_demo, data, census_data, return_components=True)

            num = data.size(0)

            loss_meter_1.update(loss1.item(), num)
            loss_meter_2.update(loss2.item(), num)

            if step % 10 == 0:
                print(step, end='\t')

        best_1 = loss_meter_1.avg
        best_2 = loss_meter_2.avg
        print(best_1, best_2)

        with open(out_dir+"SAE_train.csv", "a") as f:
            f.write("%s,%s,%d,%s,%s,%d,%.4f,%.4f,%.4f,%.4f\n" % (model_run_date, zoomlevel, 
                model_config['output_dim']**2*2048, sampling, normalization, epoch, best_1, best_2, best_test_1, best_test_2))
    
    elif model_type == 'SAE_Adv':
        
        from M2_util_train_test import train, test, AverageMeter
        from util_model import adv_loss
        from dataloader import load_demo
                                                              
        criterion = adv_loss
        demo_cs, demo_np = load_demo(data_dir, norm=normalization)
                                      
        loss_meter_1 = AverageMeter()
        loss_meter_2 = AverageMeter()
        loss_meter_3 = AverageMeter()
                                                              
        for step, (image_list, data) in enumerate(test_loader):

            census_index = [demo_cs.index(i[i.rfind('/')+1:i.rfind('_')]) for i in image_list]
            census_data = demo_np[census_index]

            census_data = torch.tensor(census_data).to(device)
            data = data.to(device)

            out_image, out_demo, out_adv = model(data)

            loss1, loss2, loss3 = criterion(out_image, out_demo, out_adv, data, census_data, return_components=True)

            num = data.size(0)

            loss_meter_1.update(loss1.item(), num)
            loss_meter_2.update(loss2.item(), num)
            loss_meter_3.update(loss3.item(), num)

            if step % 10 == 0:
                print(step, end='\t')
                                                              
        best_test_1 = loss_meter_1.avg
        best_test_2 = loss_meter_2.avg
        best_test_3 = loss_meter_3.avg

        print(best_test_1, best_test_2, best_test_3)         
                                                              
        loss_meter_1 = AverageMeter()
        loss_meter_2 = AverageMeter()  
        loss_meter_3 = AverageMeter()
        
        for step, (image_list, data) in enumerate(train_loader):

            census_index = [demo_cs.index(i[i.rfind('/')+1:i.rfind('_')]) for i in image_list]
            census_data = demo_np[census_index]

            census_data = torch.tensor(census_data).to(device)
            data = data.to(device)

            out_image, out_demo, out_adv = model(data)

            loss1, loss2, loss3 = criterion(out_image, out_demo, out_adv, data, census_data, return_components=True)

            num = data.size(0)

            loss_meter_1.update(loss1.item(), num)
            loss_meter_2.update(loss2.item(), num)
            loss_meter_3.update(loss3.item(), num)

            if step % 10 == 0:
                print(step, end='\t')

        best_1 = loss_meter_1.avg
        best_2 = loss_meter_2.avg
        best_3 = loss_meter_3.avg

        print(best_1, best_2, best_3)

        with open(out_dir+model_type+"_train.csv", "a") as f:
            f.write("%s,%s,%d,%s,%s,%d,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n" % (model_run_date, zoomlevel, 
                    model_config['output_dim']**2*2048, sampling, normalization, epoch, 
                    best_1, best_2, best_3, best_test_1, best_test_2, best_test_3))
   
        

14139 images in dataset
1571 images in dataset
0	10	20	30	40	50	60	70	80	90	0.38506247616360556 0.005301754706083554
0	10	20	30	40	50	60	70	80	90	100	110	120	130	140	150	160	170	180	190	200	210	220	230	240	250	260	270	280	290	300	310	320	330	340	350	360	370	380	390	400	410	420	430	440	450	460	470	480	490	500	510	520	530	540	550	560	570	580	590	600	610	620	630	640	650	660	670	680	690	700	710	720	730	740	750	760	770	780	790	800	810	820	830	840	850	860	870	880	0.3740111256313506 0.0003160331942065807
