This notebook uses the following ordering of classes:

`damaged_infrastructure`: 0, `damaged_nature`: 1, `fires`: 2, `flood`: 3, `human_damage`: 4, `non_damage`: 5

In [230]:
import sys
import time
import sys
import copy
import numpy as np
import pickle
import os
import re
import itertools
from gensim.models.word2vec import Word2Vec
import sklearn
import sklearn.metrics
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, f1_score
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator
from torch.autograd import Variable
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
import torchvision
from torchvision import datasets, models, transforms

device = torch.device('cpu')

In [2]:
img_root = '../../data/img_data'
text_root = '../../data/text_data'
BASE_PATH = '../baselines'

## Create Joint Data Loader

In [3]:
all_train_data_fname = '../../intermediates/cleaned_text_train.pkl'
all_val_data_fname = '../../intermediates/cleaned_text_val.pkl'
all_test_data_fname = '../../intermediates/cleaned_text_test.pkl'

all_train_data = pickle.load(open(all_train_data_fname, 'rb'))
all_val_data = pickle.load(open(all_val_data_fname, 'rb'))
all_test_data = pickle.load(open(all_test_data_fname, 'rb'))

split_data = {
    'train': all_train_data,
    'val': all_val_data,
    'test': all_test_data
}

In [4]:
path_to_captions = {}
for split in split_data: 
    keys = list(split_data[split]['file_name'].values)
    values = list(split_data[split]['text'].values)
    path_to_captions[split] = dict(zip(keys, values))

In [5]:
class MultimodalFolder(datasets.ImageFolder):
    def __init__(self, root, captions, transform = None):
        super(MultimodalFolder, self).__init__(root, transform = transform)
        self.imgs = self.samples
        self.captions = captions
        self.transform = transform

    def __getitem__(self, index):
        label_dict = {'damaged_infrastructure': 0,
                      'damaged_nature': 1,
                      'fires': 2, 
                      'flood': 3,
                      'human_damage': 4,
                      'non_damage': 5}
        
        img, _ = super(MultimodalFolder, self).__getitem__(index)
        
        path, _ = self.imgs[index]
        path = re.sub(r'[\x80-\xFF]', '' , path)
        path, base_fname = path.rsplit('/', 1)
        
        label, split = path.rsplit('/')[-1], path.rsplit('/')[-2]
        curr_fname = base_fname.rsplit('.')[0] + '.txt'
        
        sample = {'fname': base_fname, 'image': img, 'caption': self.captions[curr_fname]}
        return sample, label_dict[label]

In [6]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale = (0.9, 1.0)), 
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(size = (224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(size = (224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

## Extract ResNet-50 Predictions for Images

In [36]:
img_pickle_files = {
    'train':(os.path.join('decisions', 'img_train_pred.pkl'), 
            os.path.join('decisions', 'img_train_gt.pkl')), 
    'val':(os.path.join('decisions', 'img_val_pred.pkl'),
          os.path.join('decisions', 'img_val_gt.pkl')),
    'test':(os.path.join('decisions', 'img_test_pred.pkl'),
           os.path.join('decisions', 'img_test_gt.pkl'))
}

def img_model_run_handler(model):
    from PIL import ImageFile
    ImageFile.LOAD_TRUNCATED_IMAGES = True
    
    image_datasets = {x: MultimodalFolder(root = os.path.join(img_root, x), 
                                          captions = path_to_captions[x], transform = data_transforms[x]) 
                      for x in ['train', 'val', 'test']}
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size = 32, shuffle = False, num_workers = 0)
                   for x in ['train', 'val', 'test']}

    X_text_train, X_text_val, X_text_test = [], [], []
    y_text_train, y_text_val, y_text_test = [], [], []
    test_ordered_captions = {
        'train': X_text_train,
        'val': X_text_val,
        'test': X_text_test
    }
    test_ordered_labels = {
        'train': y_text_train,
        'val': y_text_val,
        'test': y_text_test
    }
    
    model.eval()
    with torch.no_grad():
        for split in ['train', 'val', 'test']:
            preds_list, gt_list = [], []
            for samples, labels in dataloaders[split]:
#                 print(samples)
#                 print(samples['fname'])
#                 print(samples['caption'])
#                 return
#                 images = samples['image']                
#                 images = images.to(device)
                labels = labels.to(device)
#                 outputs = model(images)
                
#                 preds_list.extend(list(np.asarray(torch.max(outputs, 1)[1])))
#                 gt_list.extend(list(np.asarray(labels)))
                test_ordered_captions[split].extend(samples['caption'])
                test_ordered_labels[split].extend(list(np.asarray(labels)))

#             pickle_fns = img_pickle_files[split]
#             with open(pickle_fns[0], 'wb') as handle:
#                 pickle.dump(preds_list, handle)
#             with open(pickle_fns[1], 'wb') as handle:
#                 pickle.dump(gt_list, handle)
                
    return test_ordered_captions, test_ordered_labels

In [37]:
resnet_model = torchvision.models.resnet50(pretrained = False)
num_ftrs = resnet_model.fc.in_features
resnet_model.fc = nn.Linear(num_ftrs, 6)

resnet_state_file = os.path.join(BASE_PATH, 'img_only', 'output', 'trained_models', 'resnet50_model')
resnet_model.load_state_dict(torch.load(resnet_state_file))
test_ordered_captions, test_ordered_labels = img_model_run_handler(resnet_model)

AttributeError: 'list' object has no attribute 'split'

## Set up TF-IDF Vectorizer

In [23]:
X_total = test_ordered_captions['train'] + test_ordered_captions['val'] + test_ordered_captions['test']
y_total = test_ordered_labels['train'] + test_ordered_labels['val'] + test_ordered_labels['test']

In [41]:
# split up caption into lists
for i in range(len(X_total)):
    X_total[i] = X_total[i][0].split()

['one', 'of', 'the', 'houses', 'affected', 'by', 'the', 'blast', 'odorna', 'accrafloods', 'fire', 'blast', 'lives', 'photography']


AttributeError: 'list' object has no attribute 'split'

In [46]:
print(len(X_total))
print(len(y_total))

5831
5831


In [47]:
model = Word2Vec(X_total, size = 100, window = 5, min_count = 5, workers = 2)
w2v = {w: vec for w, vec in zip(model.wv.index2word, model.wv.vectors)}

## Train SVM with TF-IDF

In [158]:
svm_tfidf = Pipeline([("tfidf_vectorizer", TfidfVectorizer(analyzer = lambda x: x)), ("linear svc", SVC(kernel="linear"))])

In [193]:
scoring = ['precision_macro', 'recall_macro', 'accuracy']
svm_model = cross_validate(svm_tfidf, X_total, y_total, cv = 20, scoring = scoring, 
                           return_train_score = True, return_estimator = True)

KeyboardInterrupt: 

In [161]:
svm_model

{'estimator': (Pipeline(memory=None,
       steps=[('tfidf_vectorizer', TfidfVectorizer(analyzer=<function <lambda> at 0x115608268>, binary=False,
          decode_error='strict', dtype=<class 'numpy.float64'>,
          encoding='utf-8', input='content', lowercase=True, max_df=1.0,
          max_features=None, min_df=1, ngram_range=(1, 1), norm='l...r', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False))]), Pipeline(memory=None,
       steps=[('tfidf_vectorizer', TfidfVectorizer(analyzer=<function <lambda> at 0x115608268>, binary=False,
          decode_error='strict', dtype=<class 'numpy.float64'>,
          encoding='utf-8', input='content', lowercase=True, max_df=1.0,
          max_features=None, min_df=1, ngram_range=(1, 1), norm='l...r', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False))]), Pipeline(memory=None,
       steps=[('tfidf_vectorizer', TfidfVectorizer(analyzer=<function <lambda

In [162]:
svm_estimator = svm_model['estimator'][0]

In [168]:
text_train_preds = svm_estimator.predict(test_ordered_captions['train'])
text_val_preds = svm_estimator.predict(test_ordered_captions['val'])
text_test_preds = svm_estimator.predict(test_ordered_captions['test'])

In [176]:
y_preds = svm_estimator.predict(X_total)

3499
1166
1166


In [194]:
sklearn.metrics.accuracy_score(y_total, y_preds)

0.9682730234951124

In [None]:
train_sz = len(test_ordered_labels['train'])
val_sz = len(test_ordered_labels['val'])
test_sz = len(test_ordered_labels['test'])

text_train_preds = y_preds[:train_sz]
print(len(text_train_preds))
text_val_preds = y_preds[train_sz : train_sz + val_sz]
print(len(text_val_preds))
text_test_preds = y_preds[train_sz + val_sz: train_sz + val_sz + test_sz]
print(len(text_test_preds))

In [177]:
print(sklearn.metrics.accuracy_score(test_ordered_labels['train'], text_train_preds))
print(sklearn.metrics.accuracy_score(test_ordered_labels['val'], text_val_preds))
print(sklearn.metrics.accuracy_score(test_ordered_labels['test'], text_test_preds))

0.9539868533866819
0.9905660377358491
0.9888507718696398


## Consolidate Labels for Federator

In [178]:
text_train_correct = list((np.array(text_train_preds) == test_ordered_labels['train']) * 1)
text_val_correct = list((np.array(text_val_preds) == test_ordered_labels['val']) * 1)
text_test_correct = list((np.array(text_test_preds) == test_ordered_labels['test']) * 1)

In [179]:
(sum(text_train_correct) + sum(text_val_correct) + sum(text_test_correct))/(len(text_train_correct) + len(text_val_correct) + len(text_test_correct))

0.9682730234951124

In [180]:
img_train_preds = pickle.load(open(img_pickle_files['train'][0], 'rb'))
img_val_preds = pickle.load(open(img_pickle_files['val'][0], 'rb'))
img_test_preds = pickle.load(open(img_pickle_files['test'][0], 'rb'))

img_train_correct = list((np.array(img_train_preds) == test_ordered_labels['train']) * 1)
img_val_correct = list((np.array(img_val_preds) == test_ordered_labels['val']) * 1)
img_test_correct = list((np.array(img_test_preds) == test_ordered_labels['test']) * 1)

In [205]:
multimodal_labels = {
    'train': [text_train_correct, img_train_correct],
    'val': [text_val_correct, img_val_correct],
    'test': [text_test_correct, img_test_correct]
}

# 1 if img = 1 but text = 0, else 0
federator_train = list((np.array(img_train_correct) > np.array(text_train_correct)) * 1)
federator_val = list((np.array(img_val_correct) > np.array(text_val_correct)) * 1)
federator_test = list((np.array(img_test_correct) > np.array(text_test_correct)) * 1)
federator_total = federator_train + federator_val + federator_test

## Instantiate and Train Federator

In [215]:
fed_tfidf = Pipeline([("tfidf_vectorizer", TfidfVectorizer(analyzer = lambda x: x)), \
                      ("linear svc", SVC(kernel = "linear"))])
federator = cross_validate(fed_tfidf, X_total[:train_sz + val_sz], federator_total[:train_sz + val_sz], cv = 5, scoring = scoring, 
                           return_train_score = True, return_estimator = True)

  'precision', 'predicted', average, warn_for)


In [218]:
federator_estimator = federator['estimator'][0]
federator_total_preds = federator_estimator.predict(X_total[train_sz + val_sz:])

1166
1166


In [217]:
print('Test Accuracy: %.5f' % sklearn.metrics.accuracy_score(federator_total[train_sz + val_sz:], federator_total_preds))
print('Test F1 Score: %.5f' % sklearn.metrics.f1_score(federator_total[train_sz + val_sz:], federator_total_preds, average = 'weighted'))
print('Test Precision: %.5f' % sklearn.metrics.precision_score(federator_total[train_sz + val_sz:], federator_total_preds, average = 'weighted'))
print('Test Recall: %.5f' % sklearn.metrics.recall_score(federator_total[train_sz + val_sz:], federator_total_preds, average = 'weighted'))

Test Accuracy: 0.99228
Test F1 Score: 0.98844
Test Precision: 0.98462
Test Recall: 0.99228


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


#### 5 fold cross val
Test Accuracy: 0.92814
Test F1 Score: 0.92878
Test Precision: 0.93481
Test Recall: 0.92814

#### 10 fold cross val
Test Accuracy: 0.93020
Test F1 Score: 0.93086
Test Precision: 0.93815
Test Recall: 0.93020

#### 20 fold cross val
Test Accuracy: 0.94478
Test F1 Score: 0.94523
Test Precision: 0.95016
Test Recall: 0.94478

## Determine Downstream Classifier Accuracy

In [209]:
text_total_preds = list(text_train_preds) + list(text_val_preds) + list(text_test_preds)
img_total_preds = img_train_preds + img_val_preds + img_test_preds

In [221]:
federator_classifications = []

for i in range(len(y_total[train_sz + val_sz:])):
    if federator_total_preds[i] == 1:
        federator_classifications.append(img_test_preds[i])
    else:
        federator_classifications.append(list(text_test_preds)[i])

In [222]:
len(federator_classifications)

1166

In [223]:
print('Test Accuracy: %.5f' % sklearn.metrics.accuracy_score(y_total[train_sz + val_sz:], federator_classifications))
print('Test F1 Score: %.5f' % sklearn.metrics.f1_score(y_total[train_sz + val_sz:], federator_classifications, average = 'weighted'))
print('Test Precision: %.5f' % sklearn.metrics.precision_score(y_total[train_sz + val_sz:], federator_classifications, average = 'weighted'))
print('Test Recall: %.5f' % sklearn.metrics.recall_score(y_total[train_sz + val_sz:], federator_classifications, average = 'weighted'))

Test Accuracy: 0.98885
Test F1 Score: 0.98883
Test Precision: 0.98899
Test Recall: 0.98885


## Plot Confusion Matrix

In [228]:
cm = sklearn.metrics.confusion_matrix(y_total[train_sz + val_sz:], federator_classifications)

In [282]:
def plot_confusion_matrix(cm, classes, normalize = False, title='Feature Fusion Confusion Matrix', cmap = 'BuPu'):
    if normalize: 
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()


# Compute confusion matrix
plt.close()
plot_confusion_matrix(cm, classes = range(6), normalize = True)

Normalized confusion matrix
[[0.98954704 0.         0.         0.00696864 0.         0.00348432]
 [0.0212766  0.96808511 0.         0.         0.         0.0106383 ]
 [0.01515152 0.         0.98484848 0.         0.         0.        ]
 [0.03658537 0.         0.         0.96341463 0.         0.        ]
 [0.06       0.         0.         0.         0.94       0.        ]
 [0.         0.         0.         0.         0.         1.        ]]


In [274]:
!rm federator_matrix.png
plt.savefig('federator_matrix')

In [281]:
gt = pickle.load(open('gt_list.pkl', 'rb'))
pred = pickle.load(open('preds_list.pkl', 'rb'))

In [283]:
# Compute confusion matrix
cm = sklearn.metrics.confusion_matrix(gt, pred)
plt.close()
plot_confusion_matrix(cm, classes = range(6), normalize = True)
plt.savefig('fusion_matrix')

Normalized confusion matrix
[[0.87455197 0.05376344 0.01075269 0.00716846 0.00716846 0.04659498]
 [0.11206897 0.81896552 0.01724138 0.00862069 0.00862069 0.03448276]
 [0.07692308 0.06153846 0.84615385 0.         0.         0.01538462]
 [0.06060606 0.03030303 0.01515152 0.83333333 0.         0.06060606]
 [0.11320755 0.         0.03773585 0.         0.81132075 0.03773585]
 [0.00681431 0.0153322  0.00170358 0.00170358 0.         0.97444634]]
