In [1]:
from __future__ import division
from __future__ import print_function

import argparse
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "3"

import numpy as np
import torch
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
import sys
sys.path.append("/home/marta/jku/SBNet/ssnet_fop")

import pandas as pd
from scipy import random
from sklearn import preprocessing
# import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn

from tqdm import tqdm
from retrieval_model import FOP


In [2]:
data_folder = '/share/hel/datasets/mmimdb'
labels_folder = os.path.join(data_folder, 'labels')

train_labels_path = os.path.join(labels_folder, 'train_label.txt')
test_labels_path = os.path.join(labels_folder, 'test_label.txt')

In [25]:
texts_folder = os.path.join(data_folder, 'llava_encoded_texts')
plot_first_npz = os.path.join(texts_folder, 'llava_plot_first_latent_tensors.npz')

train_text_df = os.path.join(texts_folder, 'llava_plot_first_latent_train.csv')
test_text_df = os.path.join(texts_folder, 'llava_plot_first_latent_test.csv')

In [4]:
train_labels = pd.read_csv(train_labels_path, sep='|', header=None)
train_labels.columns = ['item_id', 'genres_str']
train_labels['genres'] = train_labels['genres_str'].apply(lambda x: x.lower().split(', '))
train_labels['item_id'] = train_labels['item_id'].apply(lambda x: x.split('.')[0])
train_items = train_labels['item_id'].values
train_labels = train_labels.set_index('item_id')


test_labels = pd.read_csv(test_labels_path, sep='|', header=None)
test_labels.columns = ['item_id', 'genres_str']
test_labels['genres'] = test_labels['genres_str'].apply(lambda x: x.lower().split(', '))
test_labels['item_id'] = test_labels['item_id'].apply(lambda x: x.split('.')[0])
test_items = test_labels['item_id'].values
test_labels = test_labels.set_index('item_id')

In [5]:
full_labels = pd.concat([train_labels, test_labels])
full_labels.head()

Unnamed: 0_level_0,genres_str,genres
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1
106714,"Crime, Drama, Thriller","[crime, drama, thriller]"
204504,"Action, Crime, Drama, Thriller","[action, crime, drama, thriller]"
1865357,Drama,[drama]
33804,"Comedy, Romance","[comedy, romance]"
66423,Comedy,[comedy]


In [6]:
with np.load(plot_first_npz) as data:
    vectors = data['values']
    # print(vectors, '\n\n\n\n\n')
    # print(vectors.reshape((vectors.shape[0], vectors.shape[-1])))
    text_features = pd.DataFrame(
        data=vectors.reshape((vectors.shape[0], vectors.shape[-1])),
        index=data['indices']
    )
    text_features.index.names = ['item_id']

In [7]:
text_features.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,7158,7159,7160,7161,7162,7163,7164,7165,7166,7167
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85234,0.679688,-1.577148,-0.349609,1.029297,-0.160645,-0.821289,0.167358,-0.208618,-1.198242,-1.203125,...,-0.207764,0.915039,0.250977,-0.510742,0.432617,0.033966,0.391602,-0.42334,0.624512,1.274414
117669,-0.07196,-0.554688,0.570312,0.314209,0.068481,-0.258301,-0.245605,-0.062134,-1.21875,-0.865234,...,0.419678,0.47583,-0.523926,-0.647949,0.395752,-0.108459,1.188477,-1.154297,1.329102,-0.093445
156171,0.273682,0.311523,0.377686,0.179565,0.008774,-0.139282,0.210938,-1.1875,-0.562988,-0.567871,...,-0.428711,-0.294189,-0.394775,0.021881,-0.109741,-0.738281,0.691406,-0.330078,0.534668,0.150146
2113090,-0.228638,-0.430908,1.057617,1.393555,0.046082,-1.164062,0.666992,-0.568359,-1.397461,-0.902344,...,0.323486,1.261719,-0.802246,0.271973,0.077637,-0.257812,0.484375,-0.919434,-0.192383,0.352295
87265,0.079895,-0.435791,0.511719,-0.263916,0.364258,0.171997,-0.450439,0.420654,-1.138672,0.060883,...,0.353516,0.743164,-0.991699,0.103516,-0.107849,-0.859863,1.129883,-0.407471,0.378906,0.927734


In [8]:
text_features.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25959 entries, 0085234 to 0106673
Columns: 7168 entries, 0 to 7167
dtypes: float16(7168)
memory usage: 355.1+ MB


In [9]:
text_features = pd.merge(text_features, full_labels['genres'], left_index=True, right_index=True, how='inner')#.drop(['genres'])

le = preprocessing.MultiLabelBinarizer()
le.fit(text_features['genres'])
full_labels = le.transform(text_features['genres'])
full_labels = pd.DataFrame(data=full_labels, index=text_features.index)
full_labels.columns = le.inverse_transform(
    np.ones(
        (1, len(full_labels.columns))))[0]

text_features = pd.merge(text_features, full_labels, left_index=True, right_index=True, how='inner').drop(columns=['genres'])

In [37]:
text_features.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23351 entries, 0085234 to 0106673
Columns: 7195 entries, 0 to western
dtypes: float16(7168), int64(27)
memory usage: 324.9+ MB


In [26]:
text_features_train = text_features.loc[train_items]
text_features_test = text_features.loc[test_items]

text_features_train.to_csv(train_text_df)
text_features_test.to_csv(test_text_df)

In [29]:
# do the same for images

In [30]:
full_labels.head()

Unnamed: 0_level_0,action,adult,adventure,animation,biography,comedy,crime,documentary,drama,family,...,news,reality-tv,romance,sci-fi,short,sport,talk-show,thriller,war,western
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85234,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
117669,0,0,0,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
156171,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2113090,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
87265,0,0,0,0,0,1,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0


In [32]:
images_folder = os.path.join(data_folder, 'llava_encoded_images')
images_npz = os.path.join(images_folder, 'llava_latent_tensors_mean.npz')

train_image_df = os.path.join(images_folder, 'llava_images_latent_train.csv')
test_image_df = os.path.join(images_folder, 'llava_images_latent_test.csv')

In [33]:
with np.load(images_npz) as data:
    vectors = data['values']
    # print(vectors, '\n\n\n\n\n')
    # print(vectors.reshape((vectors.shape[0], vectors.shape[-1])))
    image_features = pd.DataFrame(
        data=vectors.reshape((vectors.shape[0], vectors.shape[-1])),
        index=data['indices']
    )
    image_features.index.names = ['item_id']

In [34]:
image_features = pd.merge(image_features, full_labels, left_index=True, right_index=True, how='inner')

In [36]:
image_features.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23351 entries, 0043511 to 0119845
Columns: 7195 entries, 0 to western
dtypes: float16(7168), int64(27)
memory usage: 324.2+ MB


In [38]:
image_features_train = image_features.loc[train_items]
image_features_test = image_features.loc[test_items]

image_features_train.to_csv(train_image_df)
image_features_test.to_csv(test_image_df)

In [39]:
image_features_test.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,news,reality-tv,romance,sci-fi,short,sport,talk-show,thriller,war,western
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
78718,-0.604492,-0.566406,0.708008,0.321045,-0.136475,-0.475586,1.149414,0.686035,-0.10437,-0.775391,...,0,0,0,0,0,0,0,1,0,0
89003,-0.759277,-0.47168,0.868164,0.540039,-0.302246,-0.16272,1.416992,0.624023,-0.029938,-0.625488,...,0,0,0,0,0,0,0,1,0,0
98136,-0.627441,-0.315674,0.010216,0.28833,-0.046661,-0.371826,0.738281,0.575684,-0.149414,0.014832,...,0,0,0,0,0,0,0,1,0,0
57693,-0.388916,-1.271484,0.787109,0.486084,-0.128296,-0.741699,1.257812,0.584961,-0.157715,-0.610352,...,0,0,0,1,0,0,0,1,0,0
385330,-0.98291,-0.533691,0.574707,0.506836,-0.55957,-0.359619,1.46582,1.133789,-0.156006,-0.538574,...,0,0,0,0,0,0,0,0,0,0


In [40]:
text_features_test.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,news,reality-tv,romance,sci-fi,short,sport,talk-show,thriller,war,western
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
78718,1.237305,-0.873535,-0.604492,-0.167236,-0.124329,-0.22937,-0.577637,1.467773,-1.121094,0.049164,...,0,0,0,0,0,0,0,1,0,0
89003,-0.291748,-0.044464,-0.468994,0.46875,-0.82666,0.337402,0.487061,0.832031,-1.12793,0.286377,...,0,0,0,0,0,0,0,1,0,0
98136,-0.222412,-1.049805,0.17688,-0.139404,-1.808594,0.57959,1.508789,0.32666,-1.15625,-0.96582,...,0,0,0,0,0,0,0,1,0,0
57693,-0.290039,-2.853516,-0.275635,0.59375,-0.911621,-0.295654,1.543945,-0.241821,-1.222656,-0.477783,...,0,0,0,1,0,0,0,1,0,0
385330,-0.217163,-0.170532,0.291504,-0.239502,0.292969,-0.776367,0.908691,0.080017,-0.532715,-1.115234,...,0,0,0,0,0,0,0,0,0,0


In [41]:
le.classes_

array(['action', 'adult', 'adventure', 'animation', 'biography', 'comedy',
       'crime', 'documentary', 'drama', 'family', 'fantasy', 'film-noir',
       'history', 'horror', 'music', 'musical', 'mystery', 'news',
       'reality-tv', 'romance', 'sci-fi', 'short', 'sport', 'talk-show',
       'thriller', 'war', 'western'], dtype=object)

In [42]:
text_features_test[['action', 'adult', 'adventure', 'animation', 'biography', 'comedy',
       'crime', 'documentary', 'drama', 'family', 'fantasy', 'film-noir',
       'history', 'horror', 'music', 'musical', 'mystery', 'news',
       'reality-tv', 'romance', 'sci-fi', 'short', 'sport', 'talk-show',
       'thriller', 'war', 'western']]

Unnamed: 0_level_0,action,adult,adventure,animation,biography,comedy,crime,documentary,drama,family,...,news,reality-tv,romance,sci-fi,short,sport,talk-show,thriller,war,western
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0078718,0,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,1,0,0
0089003,1,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,1,0,0
0098136,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
0057693,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0
0385330,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1725929,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
0082934,0,0,0,0,0,0,1,0,1,0,...,0,0,1,0,0,0,0,1,0,0
0083550,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
0901686,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
