# Notebook to Download Images
Saves images in folders based on Unicode. Produces CSV with each image location and MBTI tag.

In [1]:
import csv

with open('Handwriting_Samples.csv', 'rb') as csvfile:
    data = list(csv.reader(csvfile, delimiter=','))

letter = [x[-2] for x in data[0][2:-1]]
print(letter)
data = data[1:]
for x in data:
    print(x)

['A', 'a', 'D', 'd', 'e', 'G', 'g', 'm', 's', 't']
['2021/01/19 10:32:12', 'INFP', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_6.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_13.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_14.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_19.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_23.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_15.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/4868795308329583051_base64_20.png', 'https://www.jotform.com/uploads/nathanianah/210168709933056/4868795308329583051/486879530832958305

In [2]:
for x in letter:
    print 'U00'+hex(ord(x))[2:]

U0041
U0061
U0044
U0064
U0065
U0047
U0067
U006d
U0073
U0074


In [3]:
from PIL import Image
import numpy as np

def center(filename, size):
    im = Image.open(filename)
    pix = np.asarray(im)
    
    # trim whitespace
    pix = pix[:,:,0:3] # Drop the alpha channel
    idx = np.where(pix-255)[0:2] # Drop the color when finding edges
    box = map(min,idx)[::-1] + map(max,idx)[::-1]
    region = im.crop(box)
    region_pix = np.asarray(region)
    new_image = Image.fromarray(region_pix)
    
    # create new image and center
    img_w,img_h=new_image.size
    bg = Image.new('RGBA', (size,size), (255, 255, 255, 255))
    bg_w, bg_h = bg.size
    offset = ((bg_w - img_w) // 2, (bg_h - img_h) // 2)
    bg.paste(new_image, offset)
    
    bg.save(filename, "PNG")

In [4]:
import requests
import os
from tqdm import tnrange, tqdm_notebook

# Download
for x in tnrange(len(data), desc="Download"):
    for y in range(len(letter)):
        url = data[x][y+2]
        code = 'U00'+hex(ord(letter[y]))[2:]
        r = requests.get(url, allow_redirects=True)
        filename = code.upper()+'/'+code.upper()+'_{:03d}'.format(x+1)+".png"
        if not os.path.exists(os.path.dirname(filename)):
            try:
                os.makedirs(os.path.dirname(filename))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        open(filename, 'wb').write(r.content)

HBox(children=(IntProgress(value=0, description=u'Download', max=40), HTML(value=u'')))




In [5]:
# Cropping
for y in tnrange(len(letter), desc="Cropping"):
    for x in range(len(data)):
        code = 'U00'+hex(ord(letter[y]))[2:]
        filename = code.upper()+'/'+code.upper()+'_{:03d}'.format(x+1)+".png"
        center(filename,300)

HBox(children=(IntProgress(value=0, description=u'Cropping', max=10), HTML(value=u'')))




In [6]:
csv_content = [[]]

# Generate CSV
for y in tnrange(len(letter), desc="Generate csv"):
    for x in range(len(data)):
        code = 'U00'+hex(ord(letter[y]))[2:]
        filename = code.upper()+'/'+code.upper()+'_{:03d}'.format(x+1)+".png"
        csv_content += [[filename,data[x][1]]]
        
csv_content = csv_content[1:]
with open('labels.csv', 'wb') as outfile:
    writer = csv.writer(outfile)
    writer.writerows(csv_content)

HBox(children=(IntProgress(value=0, description=u'Generate csv', max=10), HTML(value=u'')))


