## Loading the image data

In [1]:
import os
import numpy as np
import pandas as pd
from glob import glob
from tqdm import tqdm
import h5py

from sklearn.datasets import load_files

from keras.utils import np_utils
from keras.preprocessing import image
from keras.applications.xception import Xception
from keras.applications.xception import preprocess_input as preprocess_input_xception

In [2]:
files = glob('boneage-training-dataset/*')
additional_test_files = glob('boneage-test-dataset/*')
bone_names = [int(item[25:-4]) for item in sorted(glob("boneage-training-dataset/*"))]

In [3]:
# divide files in training (~87%), validation (~11%) and testing sets (~ 1.5%)
train_files = files[:11000]
valid_files = files[11000:12400]
test_files = files[12400:12611]

print('There are %s total bone images.\n' % len(np.hstack([train_files,valid_files,test_files])))
print('There are %d training bone images.' % len(train_files))
print('There are %d validation bone images.' % len(valid_files))
print('There are %d test bone images.'% len(test_files))

There are 12611 total bone images.

There are 11000 training bone images.
There are 1400 validation bone images.
There are 211 test bone images.


## Creating a dataframe consisting of ids, gender, image paths and bone ages

In [4]:
bone_age_df = pd.read_csv('boneage-training-dataset.csv')
image_paths = []
for i in sorted(bone_names):
    image_paths.append("boneage-training-dataset/{}.png".format(str(i)))
bone_age_df['img_paths'] = image_paths
bone_age_df['gender'] = bone_age_df['male'].map(lambda x: 0 if x else 1)
bone_age_df.drop('male',axis=1)
bone_age_df.head()

Unnamed: 0,id,boneage,male,img_paths,gender
0,1377,180,False,boneage-training-dataset/1377.png,1
1,1378,12,False,boneage-training-dataset/1378.png,1
2,1379,94,False,boneage-training-dataset/1379.png,1
3,1380,120,True,boneage-training-dataset/1380.png,0
4,1381,82,False,boneage-training-dataset/1381.png,1


## Dividing the frame into training, validation and test sets

Since the test csv file did not contain the bone age columns, so dividing the training data into three parts

Train size = 11000

Valid size = 1400

test size = 211

In [5]:
train_df = bone_age_df[1:11000]
valid_df = bone_age_df[11000:12400]
test_df = bone_age_df[12400:12611]

## Loading Target ages

In [6]:
train_targets = train_df['boneage']
valid_targets = valid_df['boneage']
test_targets = test_df['boneage']

## Creating the tensors for creating features out of the images

In [7]:
def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in img_paths]
    return np.vstack(list_of_tensors)

## Using the Xception for feature generation

In [8]:
model_xception = Xception(weights='imagenet',include_top=False)

## Creating the features for train, valid and test and saving it into npz files

In [None]:
np.savez("BoneFeaturesXceptionTest.npz",
         test=model_xception.predict(preprocess_input_xception(paths_to_tensor(test_df['img_paths']))))

In [None]:
np.savez("BoneFeaturesXceptionValid.npz",
         valid=model_xception.predict(preprocess_input_xception(paths_to_tensor(valid_df['img_paths']))))

In [9]:
np.savez("BoneFeaturesXceptionTrain.npz",
         train=model_xception.predict(preprocess_input_xception(paths_to_tensor(train_df['img_paths']))))