In [11]:
import numpy as np
import pandas as pd

### Read dataset

In [21]:
# Read the dataset files
artist_train = pd.read_csv('./wikiart_csv/artist_train.csv', header=None)
artist_val = pd.read_csv('./wikiart_csv/artist_val.csv', header=None)

artist_train = artist_train.rename(columns={0: 'image', 1: 'artist'})
artist_val = artist_val.rename(columns={0: 'image', 1: 'artist'})

In [22]:
### Display the dataframes
artist_train

Unnamed: 0,image,artist
0,Realism/vincent-van-gogh_pine-trees-in-the-fen...,22
1,Baroque/rembrandt_the-angel-appearing-to-the-s...,20
2,Post_Impressionism/paul-cezanne_portrait-of-th...,16
3,Impressionism/pierre-auguste-renoir_young-girl...,17
4,Romanticism/ivan-aivazovsky_morning-1851.jpg,9
...,...,...
13341,Cubism/pablo-picasso_still-life-with-lemon-and...,15
13342,Post_Impressionism/vincent-van-gogh_sloping-pa...,22
13343,Impressionism/pierre-auguste-renoir_head-of-a-...,17
13344,Northern_Renaissance/albrecht-durer_adam-and-e...,0


In [23]:
artist_val

Unnamed: 0,image,artist
0,Impressionism/claude-monet_water-lilies-6.jpg,4
1,Impressionism/claude-monet_banks-of-the-seine-...,4
2,Impressionism/claude-monet_vetheuil-the-church...,4
3,Impressionism/claude-monet_jeanne-marguerite-l...,4
4,Impressionism/claude-monet_houses-of-parliamen...,4
...,...,...
5701,Cubism/pablo-picasso_two-naked-figures.jpg,15
5702,Symbolism/pablo-picasso_the-tragedy-1903.jpg,15
5703,Expressionism/pablo-picasso_drunk-woman-is-tir...,15
5704,Expressionism/pablo-picasso_untitled-1958-1.jpg,15


In [26]:
artist_train['artist'].value_counts()

artist
22    1323
14    1274
17     980
4      934
18     644
2      621
0      580
11     549
20     544
12     536
15     534
7      528
1      444
5      428
16     406
9      404
13     403
6      389
3      385
8      378
10     364
19     362
21     336
Name: count, dtype: int64

### Change file path & name to 'artist/artwork_name'

In [2]:
import os
import shutil

In [9]:
for root, dirs, files in os.walk('/Users/sungjinkang/Downloads/wikiart/'):
    for file in files:
        # Split the artist and artwork
        parts = file.split('_')
        artists = parts[0]
        artwork = '_'.join(parts[1:]).replace('.jpg', '')

        # create new directory
        new_dir = os.path.join('./wikiart_by_artist/', artists)

        # if not exist
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)

        # move the file and rename the file
        old_path = os.path.join(root, file)
        new_dir = os.path.join(new_dir, artwork + '.jpg')
        shutil.move(old_path, new_dir)

In [18]:
print(artist_train['image'].dtypes)
artist_train

object


Unnamed: 0,image,class
0,Realism/vincent-van-gogh_pine-trees-in-the-fen...,22
1,Baroque/rembrandt_the-angel-appearing-to-the-s...,20
2,Post_Impressionism/paul-cezanne_portrait-of-th...,16
3,Impressionism/pierre-auguste-renoir_young-girl...,17
4,Romanticism/ivan-aivazovsky_morning-1851.jpg,9
...,...,...
13341,Cubism/pablo-picasso_still-life-with-lemon-and...,15
13342,Post_Impressionism/vincent-van-gogh_sloping-pa...,22
13343,Impressionism/pierre-auguste-renoir_head-of-a-...,17
13344,Northern_Renaissance/albrecht-durer_adam-and-e...,0


### Change csv to the corresponding file path

In [19]:
# Read the dataset files
artist_train = pd.read_csv('./wikiart_csv/artist_train.csv', header=None, names=['image', 'class'])
artist_val = pd.read_csv('./wikiart_csv/artist_val.csv', header=None, names=['image', 'class'])

# Extract artist and artwork from 'image' column
artist_train['artist'], artist_train['image'] = zip(*artist_train['image'].apply(lambda x: (x.split('/')[1].split('_')[0], '/'.join([x.split('/')[1].split('_')[0], '_'.join(x.split('/')[1].split('_')[1:])]))))
artist_val['artist'], artist_val['image'] = zip(*artist_val['image'].apply(lambda x: (x.split('/')[1].split('_')[0], '/'.join([x.split('/')[1].split('_')[0], '_'.join(x.split('/')[1].split('_')[1:])]))))

In [20]:
artist_train

Unnamed: 0,image,class,artist
0,vincent-van-gogh/pine-trees-in-the-fen-1884.jpg,22,vincent-van-gogh
1,rembrandt/the-angel-appearing-to-the-shepherds...,20,rembrandt
2,paul-cezanne/portrait-of-the-artist-s-son.jpg,16,paul-cezanne
3,pierre-auguste-renoir/young-girl-seated-in-a-m...,17,pierre-auguste-renoir
4,ivan-aivazovsky/morning-1851.jpg,9,ivan-aivazovsky
...,...,...,...
13341,pablo-picasso/still-life-with-lemon-and-orange...,15,pablo-picasso
13342,vincent-van-gogh/sloping-path-in-montmartre-18...,22,vincent-van-gogh
13343,pierre-auguste-renoir/head-of-a-woman-2.jpg,17,pierre-auguste-renoir
13344,albrecht-durer/adam-and-eve-1507.jpg,0,albrecht-durer


In [21]:
# Save to csv
artist_train.to_csv('./wikiart_csv/artist_train_modified.csv', index=False)
artist_val.to_csv('./wikiart_csv/artist_val_modified.csv', index=False)

### Augment data

In [23]:
from PIL import Image
import torchvision.transforms as transforms
from torchvision.utils import save_image
import shutil

In [None]:
# define transformation for augmentation
data_transforms = {
    transforms.RandomRotation(degree=30),
    transforms.RandomHortizontalFlip(p=0.25),
    transforms.RandomResizedCrop(size=256, scale=(0.7, 1.0)),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
}