# Preprocessing Image Data for Use in Drive Data Generator

### Create Feature Dataframe with Local Image Paths

In [1]:
# Create a dataframe with meta_info and drive, already done via munger
from munger import Munger
mngr = Munger()
df_meta = mngr.munge()[0]

Returning meta_data dataframe followed by feature_space_dataframe for  data/4096_vals.csv


In [2]:
df_meta.head()

Unnamed: 0_level_0,CID,Category,SubCategory,HeelHeight,Insole,Closure,Gender,Material,ToeStyle,img_path
CID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
100627-72,100627-72,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...
100627-255,100627-255,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...
100657-72,100657-72,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...
100657-216,100657-216,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...
101026-3,101026-3,Boots,Mid-Calf,1in - 1 3/4in,Leather;Padded,Pull-on,Men,Leather;Rubber,Square Toe;Closed Toe,data/ut-zap50k-images/Boots/Mid-Calf/Durango/1...


## Preprocess Images and Save to a New Folder to be Uploaded to Drive

### Define preprocessing Functions here

In [3]:
#img preprocessing functions
def convert_img(img, conversion_str):
    return img.convert(conversion_str)

def resize_img(img, resize):
    return img.resize(resize)

grayscale_img = lambda img: convert_img(img, 'L')

In [4]:
preprocesses = [grayscale_img]

In [5]:
from PIL import Image

def preprocess_img_row(row, folder_path='pp_data', preprocesses=[]):
    with Image.open(row['img_path']) as img:
        #Preprocess image
        for preprocess in preprocesses:
            img = preprocess(img)
        
        #Save preprocessed image
        preprocessed_path = folder_path + '/images/' + row['CID'] + '.jpg'
        img.save(preprocessed_path)
    row['preprocessed_path'] = preprocessed_path
    return row    

### Define name of preprocessed data folder here

In [6]:
preprocessed_path = 'pp_data'

### Perform Preprocessing!

In [7]:
df_preprocessed = df_meta.apply(lambda row: preprocess_img_row(row, folder_path=preprocessed_path, preprocesses=preprocesses), axis=1)

In [8]:
df_preprocessed.head()

Unnamed: 0_level_0,CID,Category,SubCategory,HeelHeight,Insole,Closure,Gender,Material,ToeStyle,img_path,preprocessed_path
CID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
100627-72,100627-72,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...,pp_data/images/100627-72.jpg
100627-255,100627-255,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...,pp_data/images/100627-255.jpg
100657-72,100657-72,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...,pp_data/images/100657-72.jpg
100657-216,100657-216,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,data/ut-zap50k-images/Shoes/Oxfords/Bostonian/...,pp_data/images/100657-216.jpg
101026-3,101026-3,Boots,Mid-Calf,1in - 1 3/4in,Leather;Padded,Pull-on,Men,Leather;Rubber,Square Toe;Closed Toe,data/ut-zap50k-images/Boots/Mid-Calf/Durango/1...,pp_data/images/101026-3.jpg


### Save CSV in Preprocessed folder!

In [9]:
#create and save drive_csv
import pandas
df_drive = df_preprocessed.drop(['CID', 'img_path'], axis=1)
df_drive.rename(columns={'preprocessed_path': 'drive_path'}, inplace=True)

csv_path = preprocessed_path + '/df.csv'
df_drive.to_csv(csv_path)

## Test that we can read in csv correctly!

In [10]:
drive_df_test = pandas.read_csv(csv_path, index_col=0)

In [11]:
drive_df_test.head()

Unnamed: 0_level_0,Category,SubCategory,HeelHeight,Insole,Closure,Gender,Material,ToeStyle,drive_path
CID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
100627-72,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,pp_data/images/100627-72.jpg
100627-255,Shoes,Oxfords,,Leather,Lace up,Men,Leather,Capped Toe;Round Toe,pp_data/images/100627-255.jpg
100657-72,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,pp_data/images/100657-72.jpg
100657-216,Shoes,Oxfords,,Leather;Padded;Removable,Lace up,Men,Leather,Capped Toe;Round Toe,pp_data/images/100657-216.jpg
101026-3,Boots,Mid-Calf,1in - 1 3/4in,Leather;Padded,Pull-on,Men,Leather;Rubber,Square Toe;Closed Toe,pp_data/images/101026-3.jpg


In [12]:
class DriveImageGenerator():
    
    def __init__(self, df, drive)

SyntaxError: invalid syntax (<ipython-input-12-053b7c97e5d7>, line 3)