# inkme_dataset prep

After having scraped the images, we need to prepare the data set to train the model. This will be done in two steps:

- Separating the data into train and test data
- Downloading/resizing them accordingly


## Train test split

In [1]:
# Importing libraries needed

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
# Read each style csv file with the URLs and split them into train and test

# Watercolor

watercolor = pd.read_csv('/Users/caionunez/Desktop/watercolor.csv')

# Split the data into train and test sets
watercolor_train, watercolor_test = train_test_split(watercolor, test_size=0.2, random_state=42)

# Save the train and test sets to separate csv files
watercolor_train.to_csv('watercolortrain.csv', index=False)
watercolor_test.to_csv('watercolortest.csv', index=False)

In [None]:
# Realism

realism = pd.read_csv('/Users/caionunez/Desktop/realism.csv')

# Split the data into train and test sets
realism_train, realism_test = train_test_split(realism, test_size=0.2, random_state=42)

# Save the train and test sets to separate csv files
realism_train.to_csv('realismtrain.csv', index=False)
realism_test.to_csv('realismtest.csv', index=False)

In [13]:
# Blackwork

blackwork = pd.read_csv('/Users/caionunez/Desktop/blackwork.csv')

# Split the data into train and test sets
blackwork_train, blackwork_test = train_test_split(blackwork, test_size=0.2, random_state=42)

# Save the train and test sets to separate csv files
blackwork_train.to_csv('blackworktrain.csv', index=False)
blackwork_test.to_csv('blackworktest.csv', index=False)

In [19]:
# Fineline

fineline = pd.read_csv('/Users/caionunez/Desktop/fineline.csv')

# Split the data into train and test sets
fineline_train, fineline_test = train_test_split(fineline, test_size=0.2, random_state=42)

# Save the train and test sets to separate csv files
fineline_train.to_csv('finelinetrain.csv', index=False)
fineline_test.to_csv('finelinetest.csv', index=False)

In [22]:
# Printing the lenght of all 8 train/test csvs

print("Watercolor train pictures", len(watercolor_train))
print("Watercolor test pictures", len(watercolor_test))
print("Realism train pictures", len(realism_train))
print("Realism test pictures", len(realism_test))
print("Blackwork train pictures", len(blackwork_train))
print("Blackwork test pictures", len(blackwork_test))
print("Fineline train pictures", len(fineline_train))
print("Fineline test pictures", len(fineline_test))

Watercolor train pictures 1602
Watercolor test pictures 401
Realism train pictures 1533
Realism test pictures 384
Blackwork train pictures 1529
Blackwork test pictures 383
Fineline train pictures 1540
Fineline test pictures 386


## Download and resizing

In [None]:
# Importing libraries needed for this step

import os
import urllib.request
from PIL import Image

In [25]:
# Define function to download and resize image

def download_resize_image(url, filename):
    try:
        # Download image from URL
        urllib.request.urlretrieve(url, filename)

        # Open downloaded image
        image = Image.open(filename)

        # Resize image to 200x200
        resized_image = image.resize((200, 200))

        # Save resized image
        resized_image.save(filename)
        
    except:
        print(f'Error downloading or resizing {filename}')

### Watercolor

In [26]:
# Watercolor train

# Loop through each row in the csv file
for index, row in watercolor_train.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/train/watercolor"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/879.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/341.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/516.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/1345.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/139.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/714.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/1108.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/739.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/875.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/watercolor/784.jpg
Error downloading 

In [27]:
# Watercolor test

# Loop through each row in the csv file
for index, row in watercolor_test.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/test/watercolor"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/watercolor/1268.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/watercolor/239.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/watercolor/1005.jpg


### Realism

In [28]:
# Realism train

# Loop through each row in the csv file
for index, row in realism_train.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/train/realism"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/494.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/1789.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/334.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/620.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/465.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/326.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/1329.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/370.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/1211.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/realism/96.jpg
Error downloading or resizing /Users/caionunez/D

In [29]:
## Realism test

# Loop through each row in the csv file
for index, row in realism_test.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/test/realism"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/904.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/1867.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/342.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/394.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/1234.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/543.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/1581.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/583.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/1132.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/realism/741.jpg
Error downloading or resizing /Users/caionunez/Desktop/c

### Blackwork

In [30]:
## Blackwork train

# Loop through each row in the csv file
for index, row in blackwork_train.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/train/blackwork"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/1395.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/426.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/701.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/292.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/175.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/926.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/939.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/1161.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/148.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/blackwork/1103.jpg
Error downloading or resizi

In [31]:
## Blackwork test

# Loop through each row in the csv file
for index, row in blackwork_test.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/test/blackwork"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/blackwork/1326.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/blackwork/305.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/blackwork/316.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/blackwork/1837.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/blackwork/942.jpg


### Fineline

In [32]:
## Fineline train

# Loop through each row in the csv file
for index, row in fineline_train.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/train/fineline"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/1593.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/422.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/909.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/1057.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/1333.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/84.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/1427.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/54.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/669.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/train/fineline/1634.jpg
Error downloading or resizing /Users/

In [33]:
## Fineline test

# Loop through each row in the csv file
for index, row in fineline_test.iterrows():
    
    # Get the URL and tattoo style from the row
    url = row['image_url']

    # Create directory for the current style if it does not exist
    style_dir = "/Users/caionunez/Desktop/cnn_model/dataset/test/fineline"

    # Get the filename for the downloaded image
    filename = f'{style_dir}/{index}.jpg'

    # Download and resize the image
    download_resize_image(url, filename)

Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/1043.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/527.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/109.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/700.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/1688.jpg
Error downloading or resizing /Users/caionunez/Desktop/cnn_model/dataset/test/fineline/1477.jpg
