## Import Library

In [36]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet50
import time
from torch.autograd import Variable
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt

## Data Preparation

In [37]:
data_path='drive/MyDrive/FaceRecognition/Dataset'

In [38]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
os.listdir(data_path)

['list_attribute.txt',
 'gender_classification.csv',
 'class_identity.txt',
 'Images',
 'gender_classification.gsheet',
 'model_saved']

In [40]:
images_list = os.listdir(data_path+'/Images')
images_list

['187288.jpg',
 '188125.jpg',
 '147643.jpg',
 '120938.jpg',
 '150031.jpg',
 '108644.jpg',
 '047315.jpg',
 '072659.jpg',
 '119273.jpg',
 '085021.jpg',
 '088672.jpg',
 '047418.jpg',
 '153504.jpg',
 '077672.jpg',
 '030742.jpg',
 '150511.jpg',
 '117232.jpg',
 '085351.jpg',
 '079514.jpg',
 '078881.jpg',
 '187802.jpg',
 '188010.jpg',
 '148116.jpg',
 '030446.jpg',
 '125028.jpg',
 '182809.jpg',
 '126880.jpg',
 '072922.jpg',
 '124865.jpg',
 '152825.jpg',
 '088686.jpg',
 '110502.jpg',
 '116203.jpg',
 '155345.jpg',
 '119682.jpg',
 '072620.jpg',
 '047575.jpg',
 '086911.jpg',
 '182720.jpg',
 '072607.jpg',
 '119678.jpg',
 '153461.jpg',
 '127416.jpg',
 '039970.jpg',
 '153310.jpg',
 '110086.jpg',
 '162413.jpg',
 '188800.jpg',
 '038477.jpg',
 '153401.jpg',
 '052853.jpg',
 '153331.jpg',
 '150267.jpg',
 '086739.jpg',
 '127374.jpg',
 '038438.jpg',
 '044908.jpg',
 '085390.jpg',
 '155810.jpg',
 '121161.jpg',
 '051208.jpg',
 '114297.jpg',
 '125404.jpg',
 '155105.jpg',
 '121309.jpg',
 '154888.jpg',
 '038447.j

#### EDA

In [6]:
len(images_list)

5017

In [7]:
df_gender_classification = pd.read_csv(data_path+'/gender_classification.csv')
df_gender_classification

Unnamed: 0,Male
0,1
1,1
2,1
3,0
4,0
...,...
4995,0
4996,1
4997,1
4998,0


In [8]:
df_class_identity = pd.read_csv(data_path+'/class_identity.txt', sep=" ", header=None)
df_class_identity

Unnamed: 0,0,1
0,000001.jpg,2880
1,000002.jpg,2937
2,000003.jpg,8692
3,000004.jpg,5805
4,000005.jpg,9295
...,...,...
202594,202595.jpg,9761
202595,202596.jpg,7192
202596,202597.jpg,9852
202597,202598.jpg,5570


In [21]:
df_list_attribute = pd.read_csv(data_path+'/list_attribute.txt', delim_whitespace=True, skiprows=[0])
df_list_attribute.reset_index(inplace=True)

In [23]:
pd.set_option('display.max_columns',None)
df_list_attribute.rename(columns={'index':'image_id'},inplace=True)
df_list_attribute.head(2)

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,Blurry,Brown_Hair,Bushy_Eyebrows,Chubby,Double_Chin,Eyeglasses,Goatee,Gray_Hair,Heavy_Makeup,High_Cheekbones,Male,Mouth_Slightly_Open,Mustache,Narrow_Eyes,No_Beard,Oval_Face,Pale_Skin,Pointy_Nose,Receding_Hairline,Rosy_Cheeks,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,1,1,-1,1,-1,-1,1,-1,-1,1,-1,-1,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1


In [34]:
data = df_list_attribute[df_list_attribute['image_id'].isin(images_list)].reset_index(drop=True)
data = data[['image_id', 'Male']]
data

Unnamed: 0,image_id,Male
0,000051.jpg,1
1,000052.jpg,1
2,000065.jpg,1
3,000166.jpg,1
4,000198.jpg,-1
...,...,...
4995,202320.jpg,-1
4996,202340.jpg,-1
4997,202347.jpg,-1
4998,202357.jpg,-1


In [35]:
data['Male'].value_counts()

-1    2953
 1    2047
Name: Male, dtype: int64

In [41]:
data

Unnamed: 0,image_id,Male
0,000051.jpg,1
1,000052.jpg,1
2,000065.jpg,1
3,000166.jpg,1
4,000198.jpg,-1
...,...,...
4995,202320.jpg,-1
4996,202340.jpg,-1
4997,202347.jpg,-1
4998,202357.jpg,-1


In [42]:
# load the data
data = images_list # load data from file provided
data =  # filter the data with available images only (5000 images)

SyntaxError: ignored

In [None]:
# split the data into train and test sets with a 80:20 ratio
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

## Preprocessing

In [None]:
class GenderDataset(Dataset):
    def __init__(self, data, image_folder_path, transform=None):
        self.data = data
        self.image_folder_path = image_folder_path
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_folder_path, self.data.iloc[idx, 0])
        # please define image convertion technique to RGB here
        gender = self.data.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(gender, dtype=torch.long)

In [None]:
transform = transforms.Compose([
    # please define data transformation techniques here
])

In [None]:
train_set = GenderDataset(train_data, image_folder_path=os.path.join(data_path, "images"), transform=transform)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=2)

test_set = GenderDataset(test_data, os.path.join(data_path, "images"), transform=transform)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=2)

## Architecture

In [None]:
# please define the model optimizer and criterion (loss function)
model = None
optimizer = None
criterion = None

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 71.6MB/s]


## Modeling

In [None]:
import time
from torch.autograd import Variable

def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, use_gpu=torch.cuda.is_available(), num_epochs=10):
    # please define the training model (VGG/GoogleNet/ResNet) here
    pass

In [None]:
dataloders = {
    "train":train_loader, "test":test_loader
}
dataset_sizes= {
    "train":len(train_set), "test":len(test_set)
}

In [None]:
use_gpu = torch.cuda.is_available()

if use_gpu:
  model = model.to("cuda")

In [None]:
model = train_model(model, dataloders, dataset_sizes, criterion, optimizer, use_gpu, 10)

## Evaluation

In [None]:
def evaluate_model(model, test_loader, target_labels):
    # please define the evaluation function here
    pass

In [None]:
evaluate_model(model, dataloders['test'], ["female", "male"])