# Final Project for Cogs 118B
Group Members:
- Samruddhi Hande ([email](shande@ucsd.edu))
- Ron Hasson ([email](rhasson@ucsd.edu))
- Andrew Hernandez ([email](ash053@ucsd.edu))
- Mehail Mathew Sunny ([email](msmathew@ucsd.edu))
- Justin Yang ([email](justin-yang@ucsd.edu))

### Import packages and set up paths

In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.decomposition import PCA

In [2]:
# Change directory to photodraw project -- on Justin's local laptop
orig_wd = os.path.abspath('data')
os.chdir('F:\\photodraw\\analysis')

# directory & file hierarchy
proj_dir = os.path.abspath('..')
analysis_dir = os.getcwd()
results_dir = os.path.join(proj_dir,'results')
csv_dir = os.path.join(results_dir,'csv')
sketch_dir = os.path.abspath(os.path.join(proj_dir,'sketches'))
feature_dir = os.path.abspath(os.path.join(proj_dir,'features'))
    
meta_path = os.path.abspath(os.path.join(feature_dir, 'metadata_pixels.csv'))
image_path = os.path.abspath(os.path.join(feature_dir, 'flattened_sketches_pixels.npy'))
meta_path_fc6 = os.path.abspath(os.path.join(feature_dir, 'METADATA_sketch.csv'))
image_path_fc6 = os.path.abspath(os.path.join(feature_dir, 'FEATURES_FC6_sketch_no-channel-norm.npy'))

### Construct easy-to-use feature representations with corresponding metadata

In [3]:
sketchnames = os.listdir(os.path.join(sketch_dir, 'photodraw2x2'))
sketchpaths = [os.path.join(sketch_dir, 'photodraw2x2', name) for name in sketchnames]

In [4]:
df = pd.DataFrame([(path.split('\\')[-1].split('_', 3)[-1].rsplit('_',3)[0],
                   i) 
                   for i,path in enumerate(sketchpaths)], columns = ['category', 'raw_sketch_ind'])
df = df.sort_values(by=['category', 'raw_sketch_ind']).reset_index(drop=True)

In [5]:
# put data into flattened num_sketchesx(224*224) array
flattened_sketch_raw = [np.array(Image.open(path))[:,:,3].flatten() for path in sketchpaths]
flattened_sketch_raw = np.vstack(flattened_sketch_raw)

# convert rgba to binary, while also casting away inprecisions
flattened_sketch_raw = flattened_sketch_raw / 255     
flattened_sketch_raw = flattened_sketch_raw.astype(int)

np.save(os.path.join(orig_wd, 'sketches_raw_nopca'), flattened_sketch_raw)

In [6]:
sketch_df = pd.read_csv(os.path.join(csv_dir, 'photodraw2x2_sketch_data.csv'))
sketch_df = sketch_df.sort_values(by=['category'])
F_fc6 = np.load(os.path.join(feature_dir, f'FEATURES_FC6_photodraw2x2_sketch.npy'))

In [7]:
F_fc6 = F_fc6[sketch_df.feature_ind.values]
df['fc6_sketch_ind'] = sketch_df.feature_ind.values
np.save(os.path.join(orig_wd, 'sketches_fc6_nopca'), F_fc6)

In [8]:
pca = PCA(n_components=10)
flattened_sketch_raw_pca = pca.fit_transform(flattened_sketch_raw)
df['raw_sketch_pca_ind'] = df.raw_sketch_ind.values
np.save(os.path.join(orig_wd, 'sketches_raw_pca'), flattened_sketch_raw_pca)

In [9]:
pca = PCA(n_components=10)
flattened_sketch_fc6_pca = pca.fit_transform(F_fc6)
df['fc6_sketch_pca_ind'] = sketch_df.feature_ind.values
np.save(os.path.join(orig_wd, 'sketches_fc6_pca'), flattened_sketch_fc6_pca)

In [10]:
df.to_csv(os.path.join(orig_wd, 'sketches_metadata.csv'), index=False)