In [3]:
from models.dataset import Dataset, DatasetReality
from models.pipeline import prep_data, train_classifier
from ana.visual import STAT, ROC, FEAT

import numpy as np
import random
np.random.seed(7)
random.seed(7)

import warnings
from sklearn.linear_model import LogisticRegression
warnings.simplefilter('ignore')

## for evaluation
import matplotlib.pyplot as plt

In [4]:
# dataset, X, y, pairs, index = prep_data(Dataset, shuffle_interval=3)
dataset = prep_data(Dataset, test=0.1, shuffle=True, seed=None,
                    text_idf=True, categ_idf=True,
                    cross_simi=True,
                    norm_simi=True, suppress_freq=1)

 - Preparing data..
 - Initialize image encoder..
  - Set feature names..
  - Build category level idf..
  - Build similarity normalizer..
 - Initialize text encoder..
 - Initialize joint encoder..
 - Set feature names..
- Fetching training data..
     Fetched - [82]
- Fetching test data..
     Fetched - [10]
  - Data train shape (246, 10443)
  - Data test shape (30, 10443)


In [5]:
dataset_r = prep_data(DatasetReality,
                      index_tup=(dataset.index_train,
                                 dataset.index_test), 
                      categ_idf=True, 
                      cross_simi=True,
                      norm_simi=True)

 - Preparing data..
 - Initialize image encoder..
  - Set feature names..
  - Build category level idf..
  - Build similarity normalizer..
 - Initialize text encoder..
 - Initialize joint encoder..
 - Set feature names..
 - Build fake layers..
 - Set feature names..
- Fetching training data..
     Fetched - [82]
- Fetching test data..
     Fetched - [10]
  - Data train shape (164, 311)
  - Data test shape (20, 311)


In [8]:
clf = train_classifier(dataset,
                       class_weight={1: 1, 0: 0.1},
                       C=20)
clf_r = train_classifier(dataset_r,
                         class_weight={1: 1, 0: 0.1},
                         C=10)

In [5]:
y_true = dataset.data_test[:,-1].toarray().flatten()
y_prob = clf.predict_proba(dataset.data_test[:,:-1])[:,1]

In [None]:
suffix = 'temp'
STAT(y_true, y_prob, path='STAT_%s' % suffix)
ROC(y_true, y_prob, path='ROC_%s' % suffix)
# FEAT(dataset, clf, path='realityFEAT_%s' % suffix)

In [9]:
from models.model import Discriminator, categMetric
discriminator = Discriminator(dataset, clf, dataset_r, clf_r)
categMetric(discriminator, lamb=0.8)

 [0] ------ 73 -----

Layers:  ['A2211', 'A3211']
Keywords:  ['landscape.n.01', 'city.n.01', 'street.n.01', 'person.n.02', 'stand.v.01', 'lean.v.01']
Sentence: A woman looks in the window.
Tokens: ['woman.n.01', 'look.v.01', 'window.n.01']
Seen tokens: ['woman.n.01', 'look.v.01', 'window.n.01']

Predicted layers:  ['A2113', 'A325']
Predicted keywords: ['room.n.01', 'object.n.01', 'wall.n.01', 'person.n.02', 'gesture.n.01'] Prob: 0.994258
Max contributed features: [('cons', '_S_wall.n.01-window.n.01_', 1.3871556681555537, 3.9477570842570873), ('cons', '_NLayers_', 2.0, 0.4814392282309747), ('real', '_P_gesture.n.01_', 0.7773747318635359, 1.185394816083373), ('real', '_S_wall.n.01_', 0.8697725664271759, -0.21221804365044694), ('real', '_NLayers_', 2.0, -0.4626998858633322), ('real', '_S_object.n.01_', 0.37501185772270385, -5.7054482876199515), ('cons', '_P_gesture.n.01_', 0.7773747318635359, -0.942456680542655)]
Acc: 0.756757 - Precision: 0.428571 - Recall: 0.375000 - F1: 0.400000
 [1] -

Layers:  ['A1', 'A2222', 'A316']
Keywords:  ['landscape.n.01', 'nature.n.03', 'wild.n.01', 'interaction.n.01', 'enjoyment.n.02']
Sentence: A woman puts up a lantern.
Tokens: ['woman.n.01', 'put.v.01', 'lantern.n.01']
Seen tokens: ['woman.n.01', 'put.v.01']

Predicted layers:  ['A2222', 'A316', 'A4']
Predicted keywords: ['landscape.n.01', 'nature.n.03', 'wild.n.01', 'interaction.n.01', 'enjoyment.n.02'] Prob: 0.938101
Max contributed features: [('cons', '_NLayers_', 3.0, 0.4814392282309747), ('cons', '_Decoration_', 1.0, 0.9093219472313856), ('cons', '_P_enjoyment.n.02_', 0.516919864717888, 1.3514688603374032), ('cons', 'put.v.01', 4.725693427236653, 0.015923794596434563), ('real', '_S_wild.n.01_', 0.2769292664318228, 0.9806634234701077), ('real', '_P_enjoyment.n.02_', 0.516919864717888, -0.3887090268731789), ('real', '_NLayers_', 3.0, -0.4626998858633322), ('real', '_Decoration_', 1.0, -3.3624135558676054)]
Acc: 0.945946 - Precision: 0.875000 - Recall: 0.875000 - F1: 0.875000
 [9] ----

0.5690126050420168

In [None]:
from nltk.corpus import wordnet as wn
for s in wn.synsets('landscape'):
    print(s, s.definition())