## Word2Vec unimodal classification demo

1. Vectorize words from captions

2. Calculate vectors average

3. Find most similar word to the average vector

3. Use one or two distinct classifiers to perform the classification

4. Compute the accuracy of classification with the selected classifiers (LogReg, SVC)

In [1]:
import pandas as pd

In [2]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
df = pd.read_json("data/COCO/coco-easier.txt", 
                  lines=True)
df = df.sample(frac=1).reset_index(drop=True)
df

Unnamed: 0,all_categories,captions,category,file_name,img_id
0,"[dog, dog, couch]","[two dogs laying down on a brown couch, there ...",dog,000000236784.jpg,236784
1,[cat],[An orange cat sitting in front of a door to a...,cat,000000360943.jpg,360943
2,"[dog, dog]",[A brown and white dog standing next to anothe...,dog,000000464522.jpg,464522
3,"[sports ball, tennis racket, person, person, p...",[A man standing on a clay tennis court with a ...,tennis racket,000000409211.jpg,409211
4,"[person, person, skis, backpack, person, skis,...","[A man riding skis down a snow covered slope.,...",skis,000000301563.jpg,301563
5,"[cat, cat]",[A cat that is standing looking through a glas...,cat,000000063552.jpg,63552
6,"[bottle, traffic light, person, person, person]",[Street art painted on the wall in an asian co...,traffic light,000000385190.jpg,385190
7,"[person, skis]",[A skier in a squat position on skies skiing d...,skis,000000173830.jpg,173830
8,"[bicycle, car, car, traffic light, traffic lig...","[a blue bike parked on a side walk , A bicycle...",traffic light,000000174482.jpg,174482
9,"[cat, laptop]","[A fluffy cat laying on an electronic device, ...",cat,000000343076.jpg,343076


In [4]:
# connect all text to build a doc2vec vocabulary
df["captions"] = df["captions"].apply(lambda sents: " ".join(sents))
df["captions"][0]

'two dogs laying down on a brown couch there are two large dogs sleeping on the couch a couple of dogs are laying on a coach Two black dogs on a couch sleeping peacefully. Two dogs are lying on the brown couch. '

In [5]:
from gensim.utils import simple_preprocess
df["words"] = df.apply(lambda row: simple_preprocess(row["captions"]), axis=1)
df["words"][0]

2018-05-20 22:39:22,226 : INFO : 'pattern' package not found; tag filters are not available for English


['two',
 'dogs',
 'laying',
 'down',
 'on',
 'brown',
 'couch',
 'there',
 'are',
 'two',
 'large',
 'dogs',
 'sleeping',
 'on',
 'the',
 'couch',
 'couple',
 'of',
 'dogs',
 'are',
 'laying',
 'on',
 'coach',
 'two',
 'black',
 'dogs',
 'on',
 'couch',
 'sleeping',
 'peacefully',
 'two',
 'dogs',
 'are',
 'lying',
 'on',
 'the',
 'brown',
 'couch']

In [6]:
from gensim.models import KeyedVectors
word2vec = KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin', binary=True, limit=200000)

2018-05-20 22:39:22,439 : INFO : loading projection weights from ./data/GoogleNews-vectors-negative300.bin
2018-05-20 22:39:26,080 : INFO : loaded (200000, 300) matrix from ./data/GoogleNews-vectors-negative300.bin


In [7]:
def get_vectors(words):
    return list(map(lambda word: word2vec[word], filter(lambda word: word in word2vec, words)))

In [8]:
df["vectors"] = df.apply(lambda row: get_vectors(row["words"]), axis=1)

In [9]:
import numpy as np
df["vector_avg"] = df.apply(lambda row: np.divide(np.sum(row["vectors"], axis = 0), len(row["vectors"])), axis=1)
df[:5]

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg
0,"[dog, dog, couch]",two dogs laying down on a brown couch there ar...,dog,000000236784.jpg,236784,"[two, dogs, laying, down, on, brown, couch, th...","[[0.03173828, -0.10644531, 0.0024108887, 0.052...","[0.052157015, -0.029487893, 0.027018366, 0.141..."
1,[cat],An orange cat sitting in front of a door to a ...,cat,000000360943.jpg,360943,"[an, orange, cat, sitting, in, front, of, door...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[0.056553233, 0.026530873, 0.0018477007, 0.101..."
2,"[dog, dog]",A brown and white dog standing next to another...,dog,000000464522.jpg,464522,"[brown, and, white, dog, standing, next, to, a...","[[0.007873535, 0.12890625, 0.02734375, 0.15234...","[0.042373456, 0.04664853, -0.014217979, 0.1092..."
3,"[sports ball, tennis racket, person, person, p...",A man standing on a clay tennis court with a r...,tennis racket,000000409211.jpg,409211,"[man, standing, on, clay, tennis, court, with,...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.032073192, 0.036617965, 0.0736241, 0.019112..."
4,"[person, person, skis, backpack, person, skis,...",A man riding skis down a snow covered slope. A...,skis,000000301563.jpg,301563,"[man, riding, skis, down, snow, covered, slope...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.073890686, -0.0041534104, 0.07101864, 0.049..."


In [10]:
# Model separates words with underscore

categories = list(map(lambda cat: cat.replace(" ", "_"), df["category"].unique()))
categories = list(map(lambda cat: cat if cat != 'traffic_light' else 'traffic', categories))
categories

['dog',
 'cat',
 'tennis_racket',
 'skis',
 'traffic',
 'pizza',
 'giraffe',
 'clock',
 'toilet',
 'surfboard']

In [11]:
# 1. Google's model does not contain phrase traffic light
# 2. Model separates words with underscore

def get_prediction(vector):
    similar_word_tuples = word2vec.similar_by_vector(vector)
    prediction = word2vec.most_similar_to_given(similar_word_tuples[0][0], categories)
    if (prediction == "traffic"):
        prediction = "traffic light"
    if (prediction == "tennis_racket"):
        prediction = "tennis racket"
    return prediction

In [12]:
df["similar"] = df.apply(lambda row: get_prediction(row["vector_avg"]), axis=1)
df[:5]

2018-05-20 22:39:28,809 : INFO : precomputing L2-norms of word weight vectors


Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar
0,"[dog, dog, couch]",two dogs laying down on a brown couch there ar...,dog,000000236784.jpg,236784,"[two, dogs, laying, down, on, brown, couch, th...","[[0.03173828, -0.10644531, 0.0024108887, 0.052...","[0.052157015, -0.029487893, 0.027018366, 0.141...",dog
1,[cat],An orange cat sitting in front of a door to a ...,cat,000000360943.jpg,360943,"[an, orange, cat, sitting, in, front, of, door...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[0.056553233, 0.026530873, 0.0018477007, 0.101...",cat
2,"[dog, dog]",A brown and white dog standing next to another...,dog,000000464522.jpg,464522,"[brown, and, white, dog, standing, next, to, a...","[[0.007873535, 0.12890625, 0.02734375, 0.15234...","[0.042373456, 0.04664853, -0.014217979, 0.1092...",dog
3,"[sports ball, tennis racket, person, person, p...",A man standing on a clay tennis court with a r...,tennis racket,000000409211.jpg,409211,"[man, standing, on, clay, tennis, court, with,...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.032073192, 0.036617965, 0.0736241, 0.019112...",tennis racket
4,"[person, person, skis, backpack, person, skis,...",A man riding skis down a snow covered slope. A...,skis,000000301563.jpg,301563,"[man, riding, skis, down, snow, covered, slope...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.073890686, -0.0041534104, 0.07101864, 0.049...",skis


## Classification and testing

In [13]:
from sklearn.model_selection import cross_val_score

In [14]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [15]:
# Returns the mean accuracy on the given test data and labels, in 5 cross validation splits
scores = cross_val_score(classifier, 
                         pd.DataFrame(df["vector_avg"].tolist()), 
                         df["category"].values, 
                         cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.96 (+/- 0.02)


In [16]:
from sklearn.svm import LinearSVC
classifier = LinearSVC()

In [17]:
# Returns the mean accuracy on the given test data and labels, in 5 cross validation splits
scores = cross_val_score(classifier, 
                         pd.DataFrame(df["vector_avg"].tolist()), 
                         df["category"].values, 
                         cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.96 (+/- 0.02)


In [18]:
from sklearn.model_selection import cross_val_predict

df["predictions"] = cross_val_predict(classifier, 
                                      pd.DataFrame(df["vector_avg"].tolist()), 
                                      df["category"].values, 
                                      cv=5)
df[:5]

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar,predictions
0,"[dog, dog, couch]",two dogs laying down on a brown couch there ar...,dog,000000236784.jpg,236784,"[two, dogs, laying, down, on, brown, couch, th...","[[0.03173828, -0.10644531, 0.0024108887, 0.052...","[0.052157015, -0.029487893, 0.027018366, 0.141...",dog,dog
1,[cat],An orange cat sitting in front of a door to a ...,cat,000000360943.jpg,360943,"[an, orange, cat, sitting, in, front, of, door...","[[0.12597656, 0.19042969, 0.06982422, 0.072265...","[0.056553233, 0.026530873, 0.0018477007, 0.101...",cat,cat
2,"[dog, dog]",A brown and white dog standing next to another...,dog,000000464522.jpg,464522,"[brown, and, white, dog, standing, next, to, a...","[[0.007873535, 0.12890625, 0.02734375, 0.15234...","[0.042373456, 0.04664853, -0.014217979, 0.1092...",dog,dog
3,"[sports ball, tennis racket, person, person, p...",A man standing on a clay tennis court with a r...,tennis racket,000000409211.jpg,409211,"[man, standing, on, clay, tennis, court, with,...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.032073192, 0.036617965, 0.0736241, 0.019112...",tennis racket,tennis racket
4,"[person, person, skis, backpack, person, skis,...",A man riding skis down a snow covered slope. A...,skis,000000301563.jpg,301563,"[man, riding, skis, down, snow, covered, slope...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.073890686, -0.0041534104, 0.07101864, 0.049...",skis,skis


## Misclassified

In [19]:
k1 = df.loc[(df.category != df.predictions)]
k1 = k1.sample(frac=1).reset_index(drop=True)
k1

Unnamed: 0,all_categories,captions,category,file_name,img_id,words,vectors,vector_avg,similar,predictions
0,"[potted plant, refrigerator, chair, cup, cup, ...",A smaller kitchen with a very decorated fridge...,surfboard,000000407614.jpg,407614,"[smaller, kitchen, with, very, decorated, frid...","[[-0.050048828, 0.034179688, -0.0703125, 0.175...","[-0.055013802, 0.08017437, -0.016232222, 0.078...",toilet,clock
1,"[person, remote, toilet]",A man holding holding a giant remote control. ...,toilet,000000037670.jpg,37670,"[man, holding, holding, giant, remote, control...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.08915138, 0.024242401, -0.038812637, 0.0623...",toilet,clock
2,"[tv, tv, mouse, keyboard, person, person, pers...",The computer monitor has a framed picture near...,cat,000000149222.jpg,149222,"[the, computer, monitor, has, framed, picture,...","[[0.080078125, 0.10498047, 0.049804688, 0.0534...","[0.027858611, -0.044955898, -0.05519055, 0.043...",tennis racket,clock
3,"[person, baseball bat, car, car, car, car, tra...",Kid poses for a picture in a baseball uniform ...,traffic light,000000491464.jpg,491464,"[kid, poses, for, picture, in, baseball, unifo...","[[0.18457031, 0.14941406, 0.09082031, 0.310546...","[-0.00027678732, 0.15596612, 0.07994754, 0.023...",tennis racket,tennis racket
4,"[cup, banana, cup, book, book, book, pizza, pi...",A variety of items is shown in a shopping cart...,pizza,000000535253.jpg,535253,"[variety, of, items, is, shown, in, shopping, ...","[[0.030639648, 0.09277344, -0.2578125, 0.07470...","[0.048654344, 0.058258295, -0.09881677, 0.1640...",dog,clock
5,"[bottle, bottle, bottle, bottle, person, tenni...",The man holds a ping pong paddle under the she...,tennis racket,000000112298.jpg,112298,"[the, man, holds, ping, pong, paddle, under, t...","[[0.080078125, 0.10498047, 0.049804688, 0.0534...","[0.02190986, 0.010273273, 0.013679749, 0.07203...",dog,clock
6,"[dog, dog, chair, person, bed, book, book, boo...",A man sitting at a computer desk in front of a...,dog,000000366884.jpg,366884,"[man, sitting, at, computer, desk, in, front, ...","[[0.32617188, 0.13085938, 0.03466797, -0.08300...","[0.06266062, 0.044794988, 0.0012399774, 0.0781...",toilet,clock
7,"[clock, person, hot dog, donut, donut, donut, ...",This a case full of doughnuts and cinnamon bun...,clock,000000405195.jpg,405195,"[this, case, full, of, doughnuts, and, cinnamo...","[[0.109375, 0.140625, -0.03173828, 0.16601562,...","[-0.00797035, 0.025007578, -0.049233273, 0.186...",pizza,pizza
8,"[bottle, clock, dining table, person, person, ...",A group of people sitting around a wooden tabl...,clock,000000009590.jpg,9590,"[group, of, people, sitting, around, wooden, t...","[[-0.021972656, 0.015197754, -0.029907227, 0.0...","[0.0048433193, 0.03782205, 0.031207366, 0.1744...",toilet,pizza
9,"[tv, cat, couch, chair, book, book, chair, boo...",A living room filled with furniture and a flat...,cat,000000139684.jpg,139684,"[living, room, filled, with, furniture, and, f...","[[0.1328125, 0.10644531, -0.07763672, 0.182617...","[0.037010524, 0.058023203, 0.00876294, 0.11404...",toilet,clock
