In [1]:
# Import packages
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.dpi"] = 72
import numpy as np
import pandas as pd
from collections import Counter
import torch
import torch.nn as nn
from torchvision import models
import seaborn as sns

Device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f'PyTorch version= {torch.__version__}')
print(f'CUDA available= {torch.cuda.is_available()}')

PyTorch version= 2.3.1+cu118
CUDA available= True


#### Exploring SemArt Dataset

In [2]:
df = pd.read_csv(r'.\\Data\\semart_train.csv', encoding = "ISO-8859-1", sep='\t')
df.head()

Unnamed: 0,IMAGE_FILE,DESCRIPTION,AUTHOR,TITLE,TECHNIQUE,DATE,TYPE,SCHOOL,TIMEFRAME
0,19873-1darmst.jpg,"The Meyer or Darmstadt Madonna is the last, mo...","HOLBEIN, Hans the Younger",Darmstadt Madonna,"Oil on limewood, 147 x 102 cm",1526 and after 1528,religious,German,1501-1550
1,18759-guard301.jpg,Whereas Canaletto incorporates classically ins...,"GUARDI, Francesco",Landscape with a Fisherman's Tent,"Oil on canvas, 49 x 77 cm",1770-75,landscape,Italian,1751-1800
2,04589-temptati.jpg,In this painting the refinement of the colouri...,"BILIVERT, Giovanni",The Temptation of Charles and Ubalde,"Oil on copper, 37 x 28 cm",1629-30,religious,Italian,1601-1650
3,15104-magi.jpg,"The left side of the painting was cut, origina...",GEERTGEN tot Sint Jans,Adoration of the Magi,"Panel, 111 x 69 cm",1480-85,religious,Netherlandish,1451-1500
4,36582-paolo_f1.jpg,The painting illustrates a famous episode from...,"SCHEFFER, Ary",The Ghosts of Paolo and Francesca Appear to Da...,"Oil on canvas, 167 x 234 cm",1835,other,Dutch,1801-1850


In [3]:
painters = np.unique(df['AUTHOR'])
print(f'Number of Painters: {len(painters)}')

Number of Painters: 3166


In [4]:
Counter(df['AUTHOR'])

Counter({'GOGH, Vincent van': 291,
         'REMBRANDT Harmenszoon van Rijn': 236,
         'GIOTTO di Bondone': 224,
         'RUBENS, Peter Paul': 187,
         'TIZIANO Vecellio': 181,
         'GRECO, El': 177,
         'VERONESE, Paolo': 170,
         'RAFFAELLO Sanzio': 169,
         'TINTORETTO': 168,
         'CRANACH, Lucas the Elder': 166,
         'TIEPOLO, Giovanni Battista': 165,
         'ANGELICO, Fra': 150,
         'UNKNOWN MASTER, Italian': 150,
         'MEMLING, Hans': 125,
         'MICHELANGELO Buonarroti': 120,
         'MANTEGNA, Andrea': 111,
         'GOYA Y LUCIENTES, Francisco de': 100,
         'BELLINI, Giovanni': 96,
         'MONET, Claude': 92,
         'BOTTICELLI, Sandro': 90,
         'TOULOUSE-LAUTREC, Henri de': 89,
         'WEYDEN, Rogier van der': 86,
         'DUCCIO di Buoninsegna': 85,
         'POUSSIN, Nicolas': 85,
         'CARAVAGGIO': 84,
         'LOTTO, Lorenzo': 84,
         'GOZZOLI, Benozzo': 84,
         'CANALETTO': 82,
         

In [5]:
print(f'Timeframes: {np.unique(df.TIMEFRAME)}')
print(f'Number of Timeframes: {len(np.unique(df.TIMEFRAME))}')

Timeframes: ['0751-0800' '0801-0850' '0851-0900' '0951-1000' '1001-1050' '1051-1100'
 '1101-1150' '1151-1200' '1201-1250' '1251-1300' '1301-1350' '1351-1400'
 '1401-1450' '1451-1500' '1501-1550' '1551-1600' '1601-1650' '1651-1700'
 '1701-1750' '1751-1800' '1801-1850' '1851-1900']
Number of Timeframes: 22


#### Exploring Style Predictions dataset

In [6]:
styles = pd.read_csv(r'.\\Data\\style_predictions.csv', index_col = 0)
styles.head()

Unnamed: 0,Abstract_Expressionism,Action_painting,Analytical_Cubism,Art_Nouveau,Baroque,Color_Field_Painting,Contemporary_Realism,Cubism,Early_Renaissance,Expressionism,...,Northern_Renaissance,Pointillism,Pop_Art,Post_Impressionism,Realism,Rococo,Romanticism,Symbolism,Synthetic_Cubism,Ukiyo_e
00803-predel2.jpg,-5.855052,-2.445264,4.569021,-3.352057,5.504667,-4.261706,-3.373453,2.724554,11.630469,0.09176,...,4.752926,-0.247803,-3.056488,-3.320292,-5.757197,-0.441127,-3.617446,0.297716,-0.216189,0.802261
00612-01assump.jpg,-7.036362,-1.541759,1.251932,0.891822,4.758474,-4.825901,-1.799933,-1.886111,8.589615,-0.715831,...,10.572582,0.13277,-2.68244,0.290388,-4.065725,-0.436026,3.389025,1.217571,-1.016447,1.816147
28746-peasant.jpg,-0.700046,-1.472309,-0.545117,-1.318472,9.125407,-4.149126,0.640162,-0.042066,-2.182846,4.366738,...,2.170227,-1.427454,-0.489499,-3.205423,5.222207,2.195539,-1.380008,-0.770149,-0.55962,0.070471
15937-nude.jpg,6.848784,3.081389,-1.157369,4.409545,-4.106697,1.904504,0.077659,-2.560646,-1.097748,-1.226942,...,-0.894047,4.960007,1.868661,-1.260525,-3.829489,-2.618714,-1.757044,7.611616,-0.971115,-1.122009
39067-5tasso21.jpg,-6.12262,-2.033544,-0.916582,4.969154,1.594321,-3.764644,-0.794799,-1.057732,5.888536,-4.822448,...,-2.165867,-0.887301,-3.565545,-2.281056,-3.081818,3.813381,3.989535,6.825279,-2.700673,-2.256572


In [7]:
authors = ['GOGH, Vincent van', 'MONET, Claude', 'GAUGUIN, Paul', 'MANET, Edouard',
           'RAFFAELLO Sanzio','REMBRANDT Harmenszoon van Rijn','EYCK, Jan van']
X = styles.iloc[:, :-1]
y = styles.iloc[:, -1]

In [8]:
def create_artist_dataset(X, y, artist): 
    artist_df = df[df['AUTHOR'] == artist]
    X_artist = X.loc[artist_df['IMAGE_FILE']]

    styles = X_artist.columns[:27]
    possible_values = X_artist.iloc[:, :27].values
    max_style_idx = np.argmax(possible_values, axis = 1)
    max_styles = [styles[i] for i in max_style_idx]

    X_artist['dominant_style'] = max_styles
    
    return X_artist

#### Determine dominant style characteristics for each artist for painters experiment

In [9]:
for i in authors: 
    tmp = create_artist_dataset(X, y, i)
    print("************")
    print(f"Painter = {i}")
    print(Counter(tmp['dominant_style']))

************
Painter = GOGH, Vincent van
Counter({'Post_Impressionism': 164, 'Realism': 45, 'Expressionism': 23, 'Impressionism': 20, 'Symbolism': 8, 'Art_Nouveau': 8, 'Naive_Art_Primitivism': 7, 'Baroque': 5, 'Mannerism_Late_Renaissance': 4, 'Romanticism': 2, 'Northern_Renaissance': 1, 'Rococo': 1, 'Fauvism': 1, 'Cubism': 1, 'Pointillism': 1})
************
Painter = MONET, Claude
Counter({'Impressionism': 71, 'Realism': 8, 'Romanticism': 3, 'Post_Impressionism': 3, 'Cubism': 1, 'Art_Nouveau': 1, 'Pop_Art': 1, 'Rococo': 1, 'Symbolism': 1, 'Synthetic_Cubism': 1, 'Baroque': 1})
************
Painter = GAUGUIN, Paul
Counter({'Post_Impressionism': 42, 'Expressionism': 11, 'Impressionism': 6, 'Romanticism': 6, 'Symbolism': 6, 'Art_Nouveau': 4, 'Northern_Renaissance': 2, 'Pointillism': 1, 'Cubism': 1, 'Naive_Art_Primitivism': 1, 'Realism': 1})
************
Painter = MANET, Edouard
Counter({'Impressionism': 24, 'Realism': 22, 'Art_Nouveau': 3, 'Romanticism': 2, 'Symbolism': 2, 'New_Realism': 1

In [10]:
style_types = list(styles.columns)
max_styles = []
df = df[df['IMAGE_FILE'].isin(styles.index)]
for i in list(df['IMAGE_FILE']): 
    tmp = styles[styles.index == i]
    max_style_idx = np.argmax(tmp)
    max_styles.append(style_types[max_style_idx])

df['Max_Style'] = max_styles
df

Unnamed: 0,IMAGE_FILE,DESCRIPTION,AUTHOR,TITLE,TECHNIQUE,DATE,TYPE,SCHOOL,TIMEFRAME,Max_Style
0,19873-1darmst.jpg,"The Meyer or Darmstadt Madonna is the last, mo...","HOLBEIN, Hans the Younger",Darmstadt Madonna,"Oil on limewood, 147 x 102 cm",1526 and after 1528,religious,German,1501-1550,Northern_Renaissance
1,18759-guard301.jpg,Whereas Canaletto incorporates classically ins...,"GUARDI, Francesco",Landscape with a Fisherman's Tent,"Oil on canvas, 49 x 77 cm",1770-75,landscape,Italian,1751-1800,Baroque
2,04589-temptati.jpg,In this painting the refinement of the colouri...,"BILIVERT, Giovanni",The Temptation of Charles and Ubalde,"Oil on copper, 37 x 28 cm",1629-30,religious,Italian,1601-1650,Baroque
3,15104-magi.jpg,"The left side of the painting was cut, origina...",GEERTGEN tot Sint Jans,Adoration of the Magi,"Panel, 111 x 69 cm",1480-85,religious,Netherlandish,1451-1500,Northern_Renaissance
4,36582-paolo_f1.jpg,The painting illustrates a famous episode from...,"SCHEFFER, Ary",The Ghosts of Paolo and Francesca Appear to Da...,"Oil on canvas, 167 x 234 cm",1835,other,Dutch,1801-1850,Baroque
...,...,...,...,...,...,...,...,...,...,...
19239,07228-canal515.jpg,This picture is a supreme example of a distinc...,CANALETTO,Venice: The Grand Canal from Palazzo Flangini ...,"Oil on canvas, 47 x 78 cm",c.1738,landscape,Italian,1701-1750,Baroque
19240,06294-fighting.jpg,Dutch realism was a matter not merely of imita...,"BROUWER, Adriaen",Peasants Fighting,"Oil on wood, 33 x 49 cm",1631-35,genre,Flemish,1601-1650,Baroque
19241,21607-mytholo1.jpg,Lauri's speciality was the ideal landscape wit...,"LAURI, Filippo",Venus and Adonis,"Oil on copper, 18 x 29 cm",1650s,mythological,Italian,1651-1700,Baroque
19242,02815-bacchant.jpg,Augustin was a French miniaturist who revived ...,"AUGUSTIN, Jean-Baptiste-Jacques",A Bacchante,"Ivory, diameter: 8 cm",1799,mythological,French,1751-1800,Rococo


#### Explore correlation between timeframe and style

In [11]:
for i in np.unique(df['TIMEFRAME']): 
    tmp = df[df.TIMEFRAME == i]
    print('***************************')
    print(i)
    print(Counter(tmp.Max_Style))

***************************
0751-0800
Counter({'Expressionism': 1, 'Post_Impressionism': 1})
***************************
0801-0850
Counter({'Early_Renaissance': 3, 'High_Renaissance': 2})
***************************
0851-0900
Counter({'Mannerism_Late_Renaissance': 1, 'Expressionism': 1, 'Impressionism': 1})
***************************
0951-1000
Counter({'Impressionism': 2, 'Expressionism': 1, 'New_Realism': 1})
***************************
1001-1050
Counter({'Early_Renaissance': 1})
***************************
1051-1100
Counter({'Early_Renaissance': 15, 'Romanticism': 2, 'High_Renaissance': 2, 'Post_Impressionism': 2, 'Art_Nouveau': 2, 'Expressionism': 1, 'Symbolism': 1, 'Mannerism_Late_Renaissance': 1})
***************************
1101-1150
Counter({'Early_Renaissance': 5, 'Symbolism': 2, 'Impressionism': 2, 'Expressionism': 1, 'Mannerism_Late_Renaissance': 1, 'Art_Nouveau': 1, 'Naive_Art_Primitivism': 1, 'Romanticism': 1})
***************************
1151-1200
Counter({'Early_Renaissa