In [1]:
import numpy as np
from collections import defaultdict
from sklearn.model_selection import train_test_split
import pandas as pd
import re, sys, os
import h5py
import numpy
import scipy.io as sio
import hdf5storage
# Enable inline plotting  
%matplotlib inline

## Load and Split the data 

In [2]:
sys.path.append("/Users/mahzadkhoshlessan/Desktop/Machine-Learning/Intro-to-Deep-Learning/Final_Project/PyTorch")

In [3]:
from CNN_Model import project_01, normalize_im

In [4]:
# Load training data and divide it to training and validation sets

In [5]:
matfile = h5py.File('PyTorch/TrainingSet.mat', 'r')

In [6]:
list(matfile.keys())

['heatmaps', 'patches', 'spikes']

In [7]:
patches = np.array(matfile['patches'])

In [8]:
heatmaps = 100.0*np.array(matfile['heatmaps'])  

In [9]:
spikes = np.array(matfile['heatmaps'])  

In [10]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(patches, heatmaps, spikes, test_size=0.3, random_state=42)

In [11]:
print('Number of Training Examples: %d' % X_train.shape[0])
print('Number of Validation Examples: %d' % X_test.shape[0])

Number of Training Examples: 7000
Number of Validation Examples: 3000


In [12]:
# Setting type
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
z_train = z_train.astype('float32')
z_test = z_test.astype('float32')

## Projection & Normalization

In [13]:
#===================== Training & test set normalization ==========================
# normalize training & test images to be in the range [0,1] and calculate the training & test set mean and std

In [14]:
mean_train = np.zeros(X_train.shape[0],dtype=np.float32)
std_train = np.zeros(X_train.shape[0], dtype=np.float32)
for i in range(X_train.shape[0]):
    X_train[i, :, :] = project_01(X_train[i, :, :])
    mean_train[i] = X_train[i, :, :].mean()
    std_train[i] = X_train[i, :, :].std()

In [15]:
# resulting normalized training images
mean_val_train = mean_train.mean()
std_val_train = std_train.mean()
X_train_norm = np.zeros(X_train.shape, dtype=np.float32)
for i in range(X_train.shape[0]):
    X_train_norm[i, :, :] = normalize_im(X_train[i, :, :], mean_val_train, std_val_train)

In [16]:
psize =  X_train_norm.shape[1]

In [17]:
# ===================== Test set normalization ==========================
# normalize test images to be in the range [0,1] and calculate the test set 
# mean and std

In [18]:
mean_test = np.zeros(X_test.shape[0],dtype=np.float32)
std_test = np.zeros(X_test.shape[0], dtype=np.float32)
for i in range(X_test.shape[0]):
    X_test[i, :, :] = project_01(X_test[i, :, :])
    mean_test[i] = X_test[i, :, :].mean()
    std_test[i] = X_test[i, :, :].std()

In [19]:
# resulting normalized test images
mean_val_test = mean_test.mean()
std_val_test = std_test.mean()
X_test_norm = np.zeros(X_test.shape, dtype=np.float32)
for i in range(X_test.shape[0]):
    X_test_norm[i, :, :] = normalize_im(X_test[i, :, :], mean_val_test, std_val_test)

In [20]:
X_train_norm = X_train_norm.reshape(X_train.shape[0], 1, psize, psize)
X_test_norm = X_test_norm.reshape(X_test.shape[0], 1, psize, psize)
y_train = y_train.reshape(y_train.shape[0], 1, psize, psize)
y_test = y_test.reshape(y_test.shape[0], 1, psize, psize)

In [21]:
np.shape(X_test_norm)

(3000, 1, 208, 208)

### Save normalized values into separate h5 files for both train & test datasets

In [22]:
def write2h5(f,X,Y,size,bSize):    
    for i,j in enumerate(range(0,size,bSize)):
        if i<(size//bSize):
            dfset = f.create_dataset('batch_{}'.format(i),(2,bSize,1,208,208),dtype='f')
            dfset[:2,:,:,:,:] = X[j:j+bSize,:,:,:], Y[j:j+bSize,:,:,:]

### Get h5 file for Training set 

In [23]:
HDF5 = os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),'Input','TrainingSplit32.h5')))

In [24]:
f = h5py.File(HDF5, 'w') 

In [25]:
write2h5(f,X_train_norm,y_train,y_train.shape[0],32)

In [26]:
f.close()

### Get h5 file for Test set  

In [27]:
HDF5 = os.path.abspath(os.path.normpath(os.path.join(os.getcwd(),'Input','TestSplit32.h5')))

In [28]:
f = h5py.File(HDF5, 'w') 

In [29]:
write2h5(f,X_test_norm,y_test,y_test.shape[0],32)

In [30]:
f.close()