# Dataset creation

This notebook extracts the features from the ArtEmis dataset for every architecture used in this project.


In [74]:
import torch
import clip
import numpy as np
import pickle
import os
import os.path as osp

import copy
import argparse
import sklearn
import pandas as pd
import ast
import data_tools as dt
import torch.nn as nn
import custom as ct

from tqdm import tqdm
from artemis.emotions import ARTEMIS_EMOTIONS  
imp.reload(dt)

<module 'data_tools' from '/scratch/students/2021-fall-mt-rszymcza/code/data_tools.py'>

In [80]:

device = "cuda" if torch.cuda.is_available() else "cpu"

data_path = "../_data/" #Change to the directory you want the embeddings to be saved

# The CSVs can be found at https://www.artemisdataset.org/#dataset
# And follow the steps at https://github.com/optas/artemis
image_emotion_histogram_path = "../code/artemis/artemis/data/image-emotion-histogram.csv" 
artemis_preprocessed_path = "artemis/artemis/data/artemis_preprocessed.csv" 

# To download the wikiart dataset : http://web.fsktm.um.edu.my/~cschan/source/ICIP2017/wikiart.zip
wikiart_path = "../data/wikiart/"

preprocessed_dir = osp.join(data_path, "preprocessed")
if not osp.exists(preprocessed_dir) : os.mkdir(preprocessed_dir)

wikiart_embeddings_path =  osp.join(data_path, "wikiart_embeddings")
if not osp.exists(wikiart_embeddings_path) : os.mkdir(wikiart_embeddings_path)

clip_path =  osp.join(wikiart_embeddings_path, "clip")
if not osp.exists(clip_path) : os.mkdir(clip_path)

## CLIP Models

In [4]:
clip.available_models()

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'ViT-B/32', 'ViT-B/16']

In [None]:
class EigenModel(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x

def dataset_for_subsets( model, loaders_dict, path,recreate = False):
    "Create a dataset for each subset dictionary of data_loaders"
    if not osp.exists(path) : os.mkdir(path)
    for subset, loader in loaders_dict.items():
        dt.create_dataset(model,
                          loader,
                          osp.join(path, subset),
                          recreate = recreate)

for model_name in clip.available_models():
    model, preprocess = clip.load(model_name)
    model = model.visual.to(device)
    high_res = model_name in high_res_models
    img_size = preprocess.transforms[1].size[0]
    path = osp.join(preprocessed_dir, f"img_size_{img_size}")
    if not osp.exists(path) : 
        os.mkdir(path)
        wikiart_loaders = dt.get_original_wikiart_dataloaders(image_emotion_histogram_path,
                                          artemis_preprocessed_path,
                                          wikiart_path,
                                          preprocess)
        dataset_for_subsets(EigenModel(),
                          wikiart_loaders,
                          path)
        
    
    img_loaders = dt.get_loaders(path)     
    model_name = model_name.replace("/", "")
    print(f"creating set for {model_name}")
    model_path = osp.join(clip_path, model_name)
    dataset_for_subsets(model,
                          img_loaders,
                          model_path)
    

## ImageNet pre-trained models

In [97]:
from torchvision import models

In [81]:
path_224 = osp.join(preprocessed_dir, "img_size_224")
loaders_224 = dt.get_loaders(path_224)

In [98]:
models_imagenet = {}
models_imagenet["resnet50"] = models.resnet50(pretrained = True)
models_imagenet["alexnet"]= models.alexnet(pretrained = True)


In [99]:
path_imagenet_embeddings = osp.join(wikiart_embeddings_path, "imagenet")
if not osp.exists(path_imagenet_embeddings) : os.mkdir(path_imagenet_embeddings)

In [None]:
for name, model in models_imagenet.items():
    path = osp.join(path_imagenet_embeddings, name)
    if not osp.exists(path) : os.mkdir(path)
    dataset_for_subsets(model,
                          loaders_224,
                          path)
    