In [1]:
#import argparse
#import os
#import random
#import time
#import sys
import numpy as np
import pandas as pd
#import shutil

import torch
import torch.nn as nn
#import torch.nn.parallel
import torch.optim
#iimport torch.multiprocessing as mp
import torch.utils.data
import torchvision.transforms as transforms
#import torchvision.datasets as datasets
import torchvision.models as models
from PIL import Image
import cv2
import json

In [2]:
class TimeDataset(torch.utils.data.Dataset):
    def __init__(self, filenames, labels, tranform):
        self.filenames = filenames
        self.labels = labels
        self.transform = tranform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        Y = self.labels[index]
        cv_in = cv2.imread(self.filenames[index]) # color
        #if cv_in is None:
            #print('input: '+self.filenames[ID])
        #if cv_out is None:
            #print('output: '+self.filenames[ID])
        pil_in = Image.fromarray(cv_in)
        X = self.transform(pil_in)
        return X,Y

In [3]:
def train(train_loader,model,criterion,optimizer):
    model.train()
    total_loss = 0.0
    epoch_samples = 0
    for x, y in train_loader:
        epoch_samples += x.size(0)
        y = y.float()
        x = x.cuda(non_blocking=True)
        y = y.cuda(non_blocking=True)

        x_var = torch.autograd.Variable(x)
        y_var = torch.autograd.Variable(y)

        yhat = model(x_var)
        loss = criterion(yhat.squeeze(),y_var)
        total_loss += loss.data.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return (total_loss/epoch_samples)

In [4]:
def val(val_loader,model,criterion):
    model.eval()
    total_loss = 0.0
    epoch_samples = 0
    #with torch.no_grad():
    for x, y in val_loader:
        epoch_samples += x.size(0)
        y = y.float()
        x = x.cuda(non_blocking=True)
        y = y.cuda(non_blocking=True)

        x_var = torch.autograd.Variable(x)
        y_var = torch.autograd.Variable(y)

        yhat = model(x_var)
        loss = criterion(yhat.squeeze(),y_var)
        total_loss += loss.data.item()

    return (total_loss/epoch_samples)

In [5]:
with open("../mirflickr1m/labels.json", "r") as file:
    labels_dict = json.load(file)
filenames = list(labels_dict.keys())
labels = list(labels_dict.values())
df_dict = {'filename': filenames, 'label': labels}
# data = pd.read_csv('../final_data.csv')
data = pd.DataFrame(df_dict)
data_train = data.sample(frac=0.8,random_state=17)
data_val = data.loc[~data.index.isin(data_train.index)]
files_train = list(data_train['filename'])
files_val = list(data_val['filename'])
label_train = list(data_train['label'])
label_val = list(data_val['label'])
# ids_train = [i for i in range(len(files_train))]
# ids_val = [i for i in range(len(files_val))]
data = None
data_train = None
data_val = None
model = models.resnet50()
model.fc = nn.Sequential(nn.Linear(2048, 10),
                                      nn.ReLU(inplace=True),
                                      nn.Linear(10,1))
model = torch.nn.DataParallel(model).cuda()
# model.load_state_dict(torch.load("model_hazy_best.pth"),strict=False) # on GPU
criterion = nn.MSELoss().cuda()
optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
train_dataset = TimeDataset(files_train, label_train,
                            transforms.Compose([transforms.Resize((256,256)),
                                                transforms.ToTensor(),
                                                transforms.Normalize(mean=[0.5231, 0.5180, 0.5115],
                                                                     std=[0.2014, 0.2018, 0.2100]),])) # normalize
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=8)
val_dataset = TimeDataset(files_val, label_val,
                          transforms.Compose([transforms.Resize((256,256)),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean=[0.5231, 0.5180, 0.5115],
                                                                   std=[0.2014, 0.2018, 0.2100]),]))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=8)

best_loss = 1e5
for epoch in range(10):
    train_loss = train(train_loader,model,criterion,optimizer)
    val_loss = val(val_loader,model,criterion)
    print('Epoch: %d, MSE train set: %.8f' % (epoch+1, train_loss))
    print('Epoch: %d, MSE val set: %.8f\n' % (epoch+1, val_loss))
#     if val_loss < best_loss:
#         torch.save(model.state_dict(),'time_model_best_5e3.pth')
#         best_loss = val_loss

Epoch: 1, MSE train set: 7.62194426
Epoch: 1, MSE val set: 7.12528823

Epoch: 2, MSE train set: 7.01334652
Epoch: 2, MSE val set: 7.07455646

Epoch: 3, MSE train set: 6.98098676
Epoch: 3, MSE val set: 6.95733151

Epoch: 4, MSE train set: 6.97745884
Epoch: 4, MSE val set: 7.06214844

Epoch: 5, MSE train set: 6.98333653
Epoch: 5, MSE val set: 7.02035061

Epoch: 6, MSE train set: 6.96900807
Epoch: 6, MSE val set: 7.09079566

Epoch: 7, MSE train set: 6.97882871
Epoch: 7, MSE val set: 6.99961192



KeyboardInterrupt: 