In [130]:
!pip install tensorflow
!pip install visualkeras
!pip install graphviz
!pip install pydot



In [214]:
import os
import pandas as pd
import seaborn as sns
from PIL import Image
import shutil
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import visualkeras
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pydot
import graphviz

In [368]:
#read dataset
originalDataset = pd.read_csv('/Users/vickieluo/Downloads/miniwikiart/mini_style_train.csv')
originalDataset.head()

Unnamed: 0.1,Unnamed: 0,0,1
0,34713,Realism/ivan-kramskoy_portrait-of-the-artist-n...,21
1,20619,Cubism/pablo-picasso_bowl-of-fruit-and-guitar-...,7
2,40036,Art_Nouveau_Modern/aubrey-beardsley_the-lady-a...,3
3,1411,Realism/theodore-rousseau_the-pond-1855.jpg,21
4,23479,Abstract_Expressionism/kenzo-okada_solstice-19...,0


In [370]:
#add path of the images: sorry i got lazy so i used absolute path
originalDataset['0'] = '/Users/vickieluo/Downloads/miniwikiart/' + originalDataset['0']
originalDataset.head()

Unnamed: 0.1,Unnamed: 0,0,1
0,34713,/Users/vickieluo/Downloads/miniwikiart/Realism...,21
1,20619,/Users/vickieluo/Downloads/miniwikiart/Cubism/...,7
2,40036,/Users/vickieluo/Downloads/miniwikiart/Art_Nou...,3
3,1411,/Users/vickieluo/Downloads/miniwikiart/Realism...,21
4,23479,/Users/vickieluo/Downloads/miniwikiart/Abstrac...,0


In [372]:
#clean up dataset by dropping unamed column and renaming columns
originalDataset.drop(originalDataset.columns[originalDataset.columns.str.contains(
    'unnamed', case=False)], axis=1, inplace=True)
originalDataset.rename(columns={'0': 'filename', '1': 'genre'}, inplace=True)
print(originalDataset['genre'].value_counts())
#drop the genres that are not in the csv
droplist = [1, 25, 26, 18, 6]
originalDataset = originalDataset[~originalDataset['genre'].isin(droplist)]
originalDataset.head()

genre
12    157
21    141
23     88
9      84
20     82
3      54
24     53
4      48
17     36
7      34
19     25
15     25
0      23
13     23
22     22
14     20
8      16
5      15
16     12
10     12
11     11
6       6
25      4
26      4
18      4
1       1
Name: count, dtype: int64


Unnamed: 0,filename,genre
0,/Users/vickieluo/Downloads/miniwikiart/Realism...,21
1,/Users/vickieluo/Downloads/miniwikiart/Cubism/...,7
2,/Users/vickieluo/Downloads/miniwikiart/Art_Nou...,3
3,/Users/vickieluo/Downloads/miniwikiart/Realism...,21
4,/Users/vickieluo/Downloads/miniwikiart/Abstrac...,0


In [374]:
#making sure all paths are valid
for index, row in originalDataset.iterrows():
    if not os.path.exists(row['filename']):
        print(f"File not found: {row['filename']}")

In [378]:
#make each image 128x128 pixels and turn each image into an array of pixels and normalize and turn the image to black and white and create new df column for the pixel arrays
image = []
def pixelate(imagePath):
    newImageSize = (128, 128)
    try:
        with Image.open(imagePath) as img:
            img = img.convert('L')
            imgResized = img.resize(newImageSize)
            imgArray = np.array(imgResized, dtype=np.float32)
            imgArray /= 255.0
            image.append(imgArray)
    except Exception as e:
        print(f"Error processing {imagePath}: {e}")
for index, row in originalDataset.iterrows():
    pixelate(row['filename'])
originalDataset['Image'] = image

In [382]:
# Combine into a single nparray to make sure the dimensions are all the same
# add a dimension since we are using back and white and the model needs 4 dimensions
X = np.stack(originalDataset['Image'].values)  
X = np.expand_dims(X, axis=-1)
X.shape
#testing
for i, img in enumerate(X):
    if img is None or not isinstance(img, np.ndarray):
        print(f"Invalid entry at index {i}: {img}")
    elif img.shape != (128, 128, 1):  # Ensure the expected shape
        print(f"Invalid shape at index {i}: {img.shape}")

In [384]:
#one hot encode y
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
originalDataset['genre'] = label_encoder.fit_transform(originalDataset['genre'])
y = to_categorical(originalDataset['genre'], num_classes=21)
print(np.sum(y, axis=0))
y.shape

[ 23.  54.  48.  15.  34.  16.  84.  12.  11. 157.  23.  20.  25.  12.
  36.  25.  82. 141.  22.  88.  53.]


(981, 21)

In [386]:
#split into test and train
from sklearn.model_selection import StratifiedShuffleSplit

strat_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

for train_idx, test_idx in strat_split.split(X, y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

In [388]:
#validate split code-maybe add later
#X_train, X_val, y_cat_train, y_val = train_test_split(X_train, y_cat_train, test_size=0.2, random_state=42)

#checking for null values
print(f"Any None in X: {np.any([x is None for x in X_train])}")
print(f"Any None in y: {np.any([y is None for y in y_train])}")

Any None in X: False
Any None in y: False


In [360]:
#actual model
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size= (4,4), padding = 'same', input_shape = (128,128,1), activation= 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(filters = 64, kernel_size= (4,4), padding = 'same', activation= 'relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(rate = 0.3))
model.add(BatchNormalization())
model.add(Conv2D(filters = 128, kernel_size= (4,4), padding = 'same', activation= 'relu'))
model.add(Conv2D(filters = 128, kernel_size= (4,4), padding = 'same', activation= 'relu'))
model.add(Dropout(rate = 0.3))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(rate = 0.3))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(rate = 0.3))
model.add(Dense(21, activation='softmax'))

opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, amsgrad=False)

model.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [362]:
model.summary()

In [354]:
print(f"Length of X: {len(X_train)}, Length of y: {len(y_train)}")

Length of X: 784, Length of y: 784


In [356]:
try:
    model.fit(x = X_train,y = y_train, batch_size = 80, epochs = 10)
except Exception as e:
    print(f"Error during training: {e}")

Epoch 1/10
Error during training: None values not supported.
