In [10]:
import os
import pandas as pd
import numpy as np
import joblib
 
from imutils import paths
from sklearn.preprocessing import LabelBinarizer
from tqdm import tqdm
from pathlib import Path

import shutil

## Style Data

In [5]:
style_train_csv_path = 'dataset/Style/style_train.csv'
style_val_csv_path = 'dataset/Style/style_val.csv'

In [6]:
s_train_csv = pd.read_csv(style_train_csv_path, header=None)
s_train_csv.columns = ['filename', 'class']
s_train_csv.head()

Unnamed: 0,filename,class
0,Impressionism/edgar-degas_landscape-on-the-orn...,12
1,Realism/camille-corot_mantes-cathedral.jpg,21
2,Abstract_Expressionism/gene-davis_untitled-197...,0
3,Symbolism/kuzma-petrov-vodkin_in-the-1920.jpg,24
4,Impressionism/maurice-prendergast_paris-boulev...,12


In [7]:
s_valid_csv = pd.read_csv(style_val_csv_path, header=None)
s_valid_csv.columns = ['filename', 'class']
s_valid_csv.head()

Unnamed: 0,filename,class
0,Impressionism/edgar-degas_dancers-on-set-1880.jpg,12
1,Impressionism/claude-monet_water-lilies-6.jpg,12
2,Impressionism/giovanni-boldini_a-guitar-player...,12
3,Impressionism/john-singer-sargent_at-torre-gal...,12
4,Impressionism/john-singer-sargent_artist-in-th...,12


In [23]:
load_dir = '/media/prathmeshmadhu/My Passport/wikiart/dataset/wikiart'
which_classification = 'style_based_split'
split = ['train', 'valid']
split_dfs = [s_train_csv, s_valid_csv]
for sp, sp_df in zip(split,split_dfs):
    save_prefix = '/media/prathmeshmadhu/My Passport/wikiart/dataset/' + which_classification + os.sep + sp 
    Path(save_prefix).mkdir(parents=True, exist_ok=True)

    for i, row in sp_df.iterrows():
        
        folder_name = row['filename'].split('/')[0]
        save_dir = Path(os.path.join(save_prefix, folder_name))
        Path(save_dir).mkdir(parents=True, exist_ok=True)
        
        load_path = Path(os.path.join(load_dir, row['filename']))
        save_path = Path(os.path.join(save_dir, row['filename'].split('/')[-1]))
        
        shutil.copy(load_path, save_path)

## Artist Data

In [5]:
artist_train_csv_path = 'dataset/Artist/artist_train'
artist_val_csv_path = 'dataset/Artist/artist_val'

In [26]:
a_train_csv = pd.read_csv(artist_train_csv_path, header=None, skiprows=1)
a_train_csv.columns = ['filename', 'empty', 'class']
a_train_csv.drop(['empty'], axis=1, inplace=True)
a_train_csv.head()

Unnamed: 0,filename,class
0,Realism/vincent-van-gogh_pine-trees-in-the-fen...,22
1,Baroque/rembrandt_the-angel-appearing-to-the-s...,20
2,Post_Impressionism/paul-cezanne_portrait-of-th...,16
3,Impressionism/pierre-auguste-renoir_young-girl...,17
4,Romanticism/ivan-aivazovsky_morning-1851.jpg,9


In [28]:
a_val_csv = pd.read_csv(artist_val_csv_path, header=None, skiprows=1)
a_val_csv.columns = ['filename', 'empty', 'class']
a_val_csv.drop(['empty'], axis=1, inplace=True)
a_val_csv.head()

Unnamed: 0,filename,class
0,Impressionism/claude-monet_water-lilies-6.jpg,4
1,Impressionism/claude-monet_banks-of-the-seine-...,4
2,Impressionism/claude-monet_vetheuil-the-church...,4
3,Impressionism/claude-monet_jeanne-marguerite-l...,4
4,Impressionism/claude-monet_houses-of-parliamen...,4


## Genre Classification

In [8]:
g_train_csv_path = 'dataset/Genre/genre_train.csv'
g_val_csv_path = 'dataset/Genre/genre_val.csv'

In [29]:
g_train_csv = pd.read_csv(g_train_csv_path, header=None)
g_train_csv.columns = ['filename', 'class']
g_train_csv.head()

Unnamed: 0,filename,class
0,Northern_Renaissance/hieronymus-bosch_st-jacqu...,7
1,Post_Impressionism/vincent-van-gogh_ears-of-wh...,4
2,Symbolism/theodor-severin-kittelsen_kvitebj-rn...,3
3,Expressionism/martiros-saryan_mother-of-the-ar...,6
4,Early_Renaissance/leonardo-da-vinci_study-for-...,8


In [10]:
g_val_csv = pd.read_csv(g_val_csv_path, header=None)
g_val_csv.columns = ['filename', 'class']
g_val_csv.tail()

Unnamed: 0,filename,class
19487,Minimalism/rodolfo-arico_untitled-1965-1.jpg,0
19488,Abstract_Expressionism/perle-fine_sunblinded-1...,0
19489,Abstract_Expressionism/john-hoyland_trickster-...,0
19490,Abstract_Expressionism/richard-pousette-dart_b...,0
19491,Minimalism/enrico-castellani_no-827-1963.jpg,0


In [31]:
s_train_csv.to_csv('resources/csvs/wikiart_style_train.csv', index=None)
s_val_csv.to_csv('resources/csvs/wikiart_style_valid.csv', index=None)

In [32]:
a_train_csv.to_csv('resources/csvs/wikiart_artist_train.csv', index=None)
a_val_csv.to_csv('resources/csvs/wikiart_artist_valid.csv', index=None)

In [33]:
g_train_csv.to_csv('resources/csvs/wikiart_genre_train.csv', index=None)
g_val_csv.to_csv('resources/csvs/wikiart_genre_valid.csv', index=None)

In [19]:
len(np.unique(s_train_csv['class'])), len(np.unique(a_train_csv['class'])), len(np.unique(g_train_csv['class']))

(27, 24, 10)