In [None]:
import glob
import os
import pandas as pd
import torch
import numpy as np
from torchvision import models, transforms
from torch.autograd import Variable
from PIL import Image
from tqdm import tqdm
from torch import nn
import xgboost as xgb
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.model_selection import KFold, train_test_split, GridSearchCV

In [None]:
dataset_folder_name = 'UTKFace'

TRAIN_TEST_SPLIT = 0.8
IM_WIDTH = IM_HEIGHT = 198

dataset_dict = {
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}

dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())

In [None]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age), dataset_dict['gender_id'][int(gender)]
        except Exception as ex:
            return None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'file']
    df = df.dropna()
    
    return df

In [None]:
new_model = models.alexnet(pretrained=True)
new_classifier = nn.Sequential(*list(new_model.classifier.children())[:-1])
new_model.classifier = new_classifier
features_a = np.zeros((23705,4096))
for i, file in tqdm(enumerate(df['file'])):
    img_cat = Image.open(file).convert('RGB')
    preprocess = transforms.Compose([
            transforms.Resize(224),
#             transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )])
    
    img_cat_preprocessed = preprocess(img_cat)
    batch_img_cat_tensor = torch.unsqueeze(img_cat_preprocessed, 0)
    img_var = Variable(batch_img_cat_tensor) # assign it to a variable
    features_var = new_model(img_var) # get the output from the last hidden layer of the pretrained resnet
    features_a[i,:] = features_var.data # get the tensor out of the variable

In [None]:
y = []
for i in df['gender']:
    if i == 'female':
        y.append(1)
    else:
        y.append(0)
X_train, X_test, y_train, y_test = train_test_split(features_a, y, test_size=0.2, random_state=42)

In [None]:
parameters_gender = {
    'max_depth': [5, 10, 15, 20, 25],
    'learning_rate': [0.01, 0.02, 0.05, 0.1, 0.15],
    'n_estimators': [50, 100, 150, 200, 300, 500],
    'min_child_weight': [0, 2, 5, 10, 20],
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5]
}

In [None]:
xgb_gender = GridSearchCV(xgb.XGBClassifier(), param_grid=parameters_gender, cv=10).fit(X_train, y_train)
sum(y_test==clf.predict(X_test))/len(y_test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features_a, df['age'], test_size=0.2, random_state=42)

In [None]:
parameters_age = {
    'max_depth': [5, 10, 15, 20, 25],
    'learning_rate': [0.01, 0.02, 0.05, 0.1, 0.15],
    'n_estimators': [50, 100, 150, 200, 300, 500],
    'min_child_weight': [0, 2, 5, 10, 20],
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5],
    'subsample': [0.6, 0.7, 0.8, 0.85, 0.95]
}

In [None]:
xgb_gender = GridSearchCV(xgb.XGBRegressor(), param_grid=parameters_age, cv=10).fit(X_train, y_train)
mean_squared_error(y_test,clf.predict(X_test))