In [1]:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os
from PIL import Image    



In [2]:
import pandas as pd
import os

df = pd.read_csv('flair-vgg16-data.csv', names=['_id', 'message', 'image_concept', 'published', 'disabled'])
df['available'] = 0

all_images_path = 'data/all_images'
for i, row in df.iterrows():
    if os.path.isfile(os.path.join(all_images_path, row['_id'] + '.jpg')):
        df.at[i, 'available']= 1    
        


In [3]:
df_published = df[(df.published == 1) & (df.available == 1) & df.message.notnull()]

df_disabled = df[(df.disabled == 1) & (df.available == 1) & df.message.notnull()]


print(f"published {len(df_published)}, disabled {len(df_disabled)}")

df_all = pd.concat([df_published, df_disabled], ignore_index=True)

df_all = df_all.reset_index(drop=True)

df_all

published 2628, disabled 5348


Unnamed: 0,_id,message,image_concept,published,disabled,available
0,5e5836fee917e8d9a8a7b277,endless blues greatbarrierreef australia whits...,seascape water shoal sea turquoise sun tropica...,1,0,1
1,5e58343ded065ad79e312f3d,hamiltonisland,tree travel vacation seashore water hotel isla...,1,0,1
2,5e57dc939e88b6be2ac42800,we are going coconuts for hamiltonisland here ...,relaxation beach sea vacation sand recreation ...,1,0,1
3,5e55dca437fa5927dcdf02f3,en route to gbr embrace the elevation in luxur...,nature travel diving water sea underwater ocea...,1,0,1
4,5e55d69eb9e5b725cd7ba02f,golf course views hamiltonislandgolfcourse whi...,outdoors landscape beach sky nature rural nope...,1,0,1
...,...,...,...,...,...,...
7971,5e253779f1b8d48ba5de7d32,colours so bright they hurt your eyes tropical...,outdoors nature scenery landscape water land o...,0,1,1
7972,5e252d334610948976f731e5,호 주 학 생 비 자 치 료 마 사 지 과 정 치 료 마 사 지 과 정 은 마 사 ...,human person patient therapy massage heel spa,0,1,1
7973,5e252d334610948976f731e6,호 주 학 생 비 자 치 료 마 사 지 과 정 치 료 마 사 지 과 정 은 마 사 ...,plant paper text flower blossom,0,1,1
7974,5e252d3342307c89757703c0,호 주 학 생 비 자 치 료 마 사 지 과 정 치 료 마 사 지 과 정 은 마 사 ...,person human finger hand dating face arm,0,1,1


In [4]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df_all, test_size=0.4, random_state=42)
val_df, test_df = train_test_split(val_df, test_size=0.4, random_state=42)

train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

print(f"train {len(train_df)}, val {len(val_df)}, test {len(test_df)}")

train 4785, val 1914, test 1277


In [5]:
from flair.embeddings import (
    Sentence, 
    WordEmbeddings, 
    FlairEmbeddings, 
    StackedEmbeddings, 
    DocumentRNNEmbeddings,
    BytePairEmbeddings
)
from flair.training_utils import store_embeddings
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import numpy as np
from torchvision import transforms, models
from PIL import Image
import os
import matplotlib.pyplot as plt

%matplotlib inline


def get_pretrain_model():
    arch = 'resnet18'

    # load the pre-trained weights
    model_file = '%s_places365.pth.tar' % arch
    if not os.access(model_file, os.W_OK):
        weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
        os.system('wget ' + weight_url)

    model = models.__dict__[arch](num_classes=365)
    checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
    state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
    model.load_state_dict(state_dict)
    return model


model = get_pretrain_model()


model



  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [6]:
test_model = get_pretrain_model()
test_model = nn.Sequential(*list(test_model.children())[:-2])

test_classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    # input 512, 7, 7
    # output size: (7 - 1)/1 + 1 = 7
    # output tensor: 1 x 7 x 7
    nn.Conv2d(512, 1, 1),
    nn.ReLU(inplace=True),
    nn.AvgPool2d(7)
)        

test_aap = nn.AdaptiveAvgPool2d(1)

test_sample = test_model(torch.zeros(10,3,224,224))
print(test_sample.shape)

test_out = test_aap(test_sample)

print(test_out.shape)

torch.Size([10, 512, 7, 7])
torch.Size([10, 512, 1, 1])


In [7]:
def oversample_df(df):
    classes = ['published', 'disabled'] 
    classes_count = []
    for c in classes:    
        classes_count.append(len(df.loc[df[c] == 1]))
    
    max_count = max(classes_count)
    resample_ratios = [round(max_count/c) for c in classes_count]
            
    resampled = []
    for i in range(len(resample_ratios)):
        c = classes[i]
        ratio = resample_ratios[i]        
        for r in range(ratio):            
            resampled.append(df.loc[df[c] == 1])
            
    resampled_df = pd.concat(resampled, ignore_index=True)
    resampled_df = resampled_df.sample(frac=1)
    resampled_df = resampled_df.reset_index(drop=True)
    
    return resampled_df

resampled = oversample_df(train_df)
print(f"published {len(resampled.loc[resampled.published == 1])}")
print(f"disabled {len(resampled.loc[resampled.disabled == 1])}")


def balance_df(df):
    classes = ['published', 'disabled'] 
    classes_count = []
    for c in classes:    
        classes_count.append(len(df.loc[df[c] == 1]))
    
    min_count = min(classes_count)
    
    resampled = []
    for c in classes:
        resampled.append(df[df[c] == 1][:min_count])
        
    resampled_df = pd.concat(resampled, ignore_index=True)
    resampled_df = resampled_df.sample(frac=1)
    resampled_df = resampled_df.reset_index(drop=True)
    
    return resampled_df
    
    
resampled = balance_df(train_df)
print(f"published {len(resampled.loc[resampled.published == 1])}")
print(f"disabled {len(resampled.loc[resampled.disabled == 1])}")
    

published 3116
disabled 3227
published 1558
disabled 1558


In [8]:
def get_batches(df, transformer, oversample=False, batch_size=16):        
    if oversample:        
        df = oversample_df(df)
    else:
        df = balance_df(df)
        
    n_batches = len(df)//batch_size    
    for i in range(0, len(df), batch_size):
        sentences = [Sentence(txt) for txt in df[i:i+batch_size]['message']]
        labels = [1 if label else 0 for label in df[i:i+batch_size]['published']]
        
        images = []
        for _id in df[i:i+batch_size]['_id']:
            image = Image.open('data/all_images/'+_id+'.jpg').convert('RGB')
            images.append(transformer(image).unsqueeze(0))
            
        image_tensor = torch.cat(images, 0)
        
        yield sentences, image_tensor, torch.FloatTensor(labels)


In [9]:
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score


In [10]:
train_on_gpu = torch.cuda.is_available()

    
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
#         self.document_embeddings = DocumentRNNEmbeddings([
#             WordEmbeddings('twitter'),
#         ], hidden_size=128)
        
        pretrained_model = get_pretrain_model()
        pretrained_model = nn.Sequential(*list(pretrained_model.children())[:-2])
        for param in pretrained_model.parameters():
            param.requires_grad = False
        
        self.pretrained_model = pretrained_model

        self.adaptive_avg_pool_2d = nn.AdaptiveAvgPool2d(1)
        
        self.fc = nn.Linear(512, 1)
        
#         self.fc = nn.Linear(2048*10*10, 1)
        #self.sig = nn.Sigmoid()
        
    def forward(self, sentences, images):
        batch_size = images.size(0)
        
#         self.document_embeddings.embed(sentences)
        
#         text_embedding_list = [
#             s.embedding.unsqueeze(0) for s in sentences
#         ]
        
#         text_embedding_tensor = torch.cat(text_embedding_list, 0).cuda()
        
#         if train_on_gpu:
#             images = images.cuda()
            
#         image_tensor = self.vgg16(images)
#         image_tensor = image_tensor.view(batch_size, -1)
        
#         input_tensor = torch.cat([text_embedding_tensor, image_tensor], 1)
        
        input_tensor = images.cuda()
        out = self.pretrained_model(input_tensor)                
        out = self.adaptive_avg_pool_2d(out)
        out = out.view(batch_size, -1)
        out = self.fc(out)
        #input_tensor = input_tensor.view(batch_size, -1)
        
        #print(input_tensor.shape)
        #out = self.sig(self.fc(input_tensor))
        #out = self.sig(input_tensor)
        return out
    

inv_normalize = transforms.Normalize(
   mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
   std=[1/0.229, 1/0.224, 1/0.225]
)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])

image_dimension = 224
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(image_dimension),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(5),
    transforms.ToTensor(),
    normalize
])        

test_transform = transforms.Compose([
    transforms.Resize((image_dimension, image_dimension)),
    transforms.ToTensor(),
    normalize    
])

    

def visualise_data():
    fig = plt.figure(figsize=(15, 15))
    num_of_samples = 20
    num_cols = 5
    for i, (sentences, images, labels) in enumerate(get_batches(train_df, train_transform)):
        if i >= num_of_samples:
            break
        
        ax = fig.add_subplot(num_of_samples//num_cols + 1, num_cols, i+1, xticks=[], yticks=[])
        image = images[0]
        image = inv_normalize(image)
        plt.imshow(np.transpose(image, (1, 2, 0)))
        label = labels[0]
        ax.set_title(f"{label}")
        
        
from torch.optim import lr_scheduler

def train_model(model, epochs, lr, train_df, val_df, checkpoint_file, early_stopping=5):        
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)
    best_loss = np.inf
    no_improvement = 0

    if train_on_gpu:
        model = model.cuda()
        
    for epoch in range(epochs):        
        total_train_loss = 0
        total_val_loss = 0
        total_train = 0
        total_val = 0
        train_loss = 0
        val_loss = 0
        
        # Train
        model.train()        
        for i, (sentences, images, labels) in enumerate(get_batches(train_df, train_transform, True)):         
            if train_on_gpu:
                labels = labels.cuda()
            
            optimizer.zero_grad()
            
            out = model(sentences, images)
            loss = criterion(out.squeeze(), labels)
            loss.backward()
            
            #nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            
            total_train_loss += loss.item()
            total_train += labels.size(0)
            
            store_embeddings(sentences, 'cpu')
            
            if i % 20 == 0:
                print(f"Epoch {epoch}, Batch {i}, train loss {loss.item()/labels.size(0)}")
            
            
        train_loss = total_train_loss/total_train
        print(f"> Epoch {epoch}, train loss {train_loss}")
        
        
        # Eval
        model.eval()
        all_labels = np.array([])
        all_pred = np.array([])        
        for sentences, images, labels in get_batches(val_df, test_transform):
            if train_on_gpu:
                labels = labels.cuda()
            
            out = model(sentences, images)
            loss = criterion(out.squeeze(), labels)
            total_val_loss += loss.item()
            total_val += labels.size(0)
                                
            pred = torch.round(torch.sigmoid(out.squeeze()))
            
            # for matrix
            pred_np = pred.data.cpu().numpy() if train_on_gpu else pred.data.numpy()
            labels_np = labels.data.cpu().numpy() if train_on_gpu else labels.data.numpy()                    
            all_pred = np.concatenate([all_pred, pred_np])
            all_labels = np.concatenate([all_labels, labels_np])
                        
            # clear memory
            store_embeddings(sentences, 'cpu')
            
            
        val_loss = total_val_loss / total_val
        f1 = f1_score(all_labels, all_pred, average='weighted')
        acc = accuracy_score(all_labels, all_pred)
        
        print(f"> Epoch {epoch}, val loss {val_loss}, accuracy {acc}, f1_score {f1}")
        
        if val_loss < best_loss:
            best_loss = val_loss
            no_improvement = 0
            torch.save(model.state_dict(), checkpoint_file)
            print("Saved model.")
        else:
            no_improvement += 1
            print("No improvement.")
            if no_improvement >= early_stopping:
                print(f"Early Stopping")
                break
                
                
        # reduce learning rate
        scheduler.step()

            
                                              
checkpoint_file = 'flair_vgg16_model_final.pt'      
lr = 0.001
epochs = 20         

model = MyModel()
#print(model)
train_model(model, epochs, lr, train_df, val_df, checkpoint_file)

        


Epoch 0, Batch 0, train loss 0.050079595297575
Epoch 0, Batch 20, train loss 0.04431527853012085
Epoch 0, Batch 40, train loss 0.037323445081710815
Epoch 0, Batch 60, train loss 0.04336617887020111
Epoch 0, Batch 80, train loss 0.04194758087396622
Epoch 0, Batch 100, train loss 0.04303673282265663
Epoch 0, Batch 120, train loss 0.043144021183252335
Epoch 0, Batch 140, train loss 0.039126165211200714
Epoch 0, Batch 160, train loss 0.0434381440281868
Epoch 0, Batch 180, train loss 0.03743201866745949
Epoch 0, Batch 200, train loss 0.038320329040288925
Epoch 0, Batch 220, train loss 0.032621629536151886
Epoch 0, Batch 240, train loss 0.037512682378292084
Epoch 0, Batch 260, train loss 0.038997650146484375
Epoch 0, Batch 280, train loss 0.03067244589328766
Epoch 0, Batch 300, train loss 0.042745426297187805
Epoch 0, Batch 320, train loss 0.03372036665678024
Epoch 0, Batch 340, train loss 0.03968781977891922
Epoch 0, Batch 360, train loss 0.029326818883419037
Epoch 0, Batch 380, train loss 

Epoch 7, Batch 0, train loss 0.04290837049484253
Epoch 7, Batch 20, train loss 0.03222533315420151
Epoch 7, Batch 40, train loss 0.0440770648419857
Epoch 7, Batch 60, train loss 0.02957294136285782
Epoch 7, Batch 80, train loss 0.03239576518535614
Epoch 7, Batch 100, train loss 0.023778628557920456
Epoch 7, Batch 120, train loss 0.04036487638950348
Epoch 7, Batch 140, train loss 0.04378246143460274
Epoch 7, Batch 160, train loss 0.04232189804315567
Epoch 7, Batch 180, train loss 0.04541537165641785
Epoch 7, Batch 200, train loss 0.03979666531085968
Epoch 7, Batch 220, train loss 0.037927836179733276
Epoch 7, Batch 240, train loss 0.027853824198246002
Epoch 7, Batch 260, train loss 0.04262251406908035
Epoch 7, Batch 280, train loss 0.03131712228059769
Epoch 7, Batch 300, train loss 0.02355280891060829
Epoch 7, Batch 320, train loss 0.03134436532855034
Epoch 7, Batch 340, train loss 0.028039343655109406
Epoch 7, Batch 360, train loss 0.02802964299917221
Epoch 7, Batch 380, train loss 0.0

> Epoch 13, val loss 0.03512898736683897, accuracy 0.7131782945736435, f1_score 0.7125973062545763
Saved model.
Epoch 14, Batch 0, train loss 0.03153013810515404
Epoch 14, Batch 20, train loss 0.03529452160000801
Epoch 14, Batch 40, train loss 0.03581370785832405
Epoch 14, Batch 60, train loss 0.032160550355911255
Epoch 14, Batch 80, train loss 0.0385383702814579
Epoch 14, Batch 100, train loss 0.04002279043197632
Epoch 14, Batch 120, train loss 0.036396853625774384
Epoch 14, Batch 140, train loss 0.029498476535081863
Epoch 14, Batch 160, train loss 0.031843654811382294
Epoch 14, Batch 180, train loss 0.03488294780254364
Epoch 14, Batch 200, train loss 0.03212668374180794
Epoch 14, Batch 220, train loss 0.04549304395914078
Epoch 14, Batch 240, train loss 0.039179615676403046
Epoch 14, Batch 260, train loss 0.039079584181308746
Epoch 14, Batch 280, train loss 0.043909117579460144
Epoch 14, Batch 300, train loss 0.032503947615623474
Epoch 14, Batch 320, train loss 0.029529733583331108
Ep

In [11]:
def eval_model(model, test_df):
    if train_on_gpu:
        model = model.cuda()
        
    model.eval()
    num_correct = 0
    num_total = 0
    all_pred = np.array([])
    all_labels = np.array([])
    for i, (sentences, images, labels) in enumerate(get_batches(test_df, test_transform)):
        if train_on_gpu:
            labels = labels.cuda()
            
        out = model(sentences, images)
        pred = torch.round(torch.sigmoid(out.squeeze()))

        correct = (pred == labels)
        correct = correct.cpu().numpy() if train_on_gpu else correct.numpy()        
        num_correct += np.sum(correct)
        num_total += labels.size(0)


        
        pred_np = pred.data.cpu().numpy() if train_on_gpu else pred.data.numpy()
        labels_np = labels.data.cpu().numpy() if train_on_gpu else labels.data.numpy()                    
        all_pred = np.concatenate([all_pred, pred_np])
        all_labels = np.concatenate([all_labels, labels_np])
        
        
        store_embeddings(sentences, 'cpu')

    
    f1 = f1_score(all_labels, all_pred, average='weighted')
    acc = accuracy_score(all_labels, all_pred)
    
    print(f"{num_correct}/{num_total} correct. Accuracy: {num_correct*100/num_total} %, acc {acc}, f1 {f1}")
    
    
best_model = MyModel()
best_model.load_state_dict(torch.load(checkpoint_file))
eval_model(best_model, test_df)


591/850 correct. Accuracy: 69.52941176470588 %, acc 0.6952941176470588, f1 0.695199196981759
