<a href="https://colab.research.google.com/github/razerspeed/celeba-compition/blob/main/resnet_50_celeba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Step 0: Downloading Dataset

In [1]:
! pip install -q kaggle

! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/

! chmod 600 ~/.kaggle/kaggle.json

In [2]:
!kaggle datasets download -d jessicali9530/celeba-dataset

Downloading celeba-dataset.zip to /content
 99% 1.32G/1.33G [00:14<00:00, 107MB/s] 
100% 1.33G/1.33G [00:14<00:00, 97.8MB/s]


In [None]:
!unzip -x  celeba-dataset.zip

In [1]:
import pandas as pd
import numpy as np
!pip install torchsummary
from torchsummary import summary
pd.set_option('display.max_columns', None)

from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils ,models
from PIL import Image
from torch import optim

import torch
import torch.nn as nn
import torch.nn.functional as F




## Step 1: Loading DataFrame

In [2]:
df = pd.read_csv("list_attr_celeba.csv")

In [3]:
df.head()

Unnamed: 0,image_id,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,Blurry,Brown_Hair,Bushy_Eyebrows,Chubby,Double_Chin,Eyeglasses,Goatee,Gray_Hair,Heavy_Makeup,High_Cheekbones,Male,Mouth_Slightly_Open,Mustache,Narrow_Eyes,No_Beard,Oval_Face,Pale_Skin,Pointy_Nose,Receding_Hairline,Rosy_Cheeks,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young
0,000001.jpg,-1,1,1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,1,1,-1,1,-1,-1,1,-1,-1,1,-1,-1,-1,1,1,-1,1,-1,1,-1,-1,1
1,000002.jpg,-1,-1,-1,1,-1,-1,-1,1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1,-1,1,-1,-1,1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1
2,000003.jpg,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,1
3,000004.jpg,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,1,-1,-1,-1,-1,1,-1,1,-1,1,1,-1,1
4,000005.jpg,-1,1,1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,1


In [4]:
important_feature=df[['image_id','Male','Young']].copy()

In [5]:
del df

In [6]:
important_feature.replace(-1,0,inplace=True)

In [7]:
important_feature.shape

(202599, 3)

## Step 2: Split Dataset into Training, Validation and Test

*   List item
*   List item



In [8]:
train = important_feature.iloc[:50016] 
val = important_feature.iloc[50017:60033] 
test = important_feature.iloc[60034:65034]



In [9]:
print("length of train data ", len(train))
print("length of validataion data ", len(val))
print("length of test data ", len(test))


length of train data  50016
length of validataion data  10016
length of test data  5000


## Step 3: Creating Custom Data Generator


In [10]:
class CreateDataset(Dataset):
    def __init__(self, data):
        self.data = data 
        self.scaler = transforms.Resize([224, 224])
        self.normalize = transforms.Normalize(mean=[0.5,0.5,0.5],
                                     std=[0.5,0.5,0.5])
        self.to_tensor = transforms.ToTensor()
    def __len__(self):  
        return len(self.data)

    def __getitem__(self, idx):

        image_name , male , young = self.data.iloc[idx]
        img_loc = 'img_align_celeba/img_align_celeba/'+str(image_name)

        img = Image.open(img_loc)
        img = self.normalize(self.to_tensor(self.scaler(img)))
        
        
        

        return image_name, img , torch.tensor([male , young],dtype=torch.float)

In [11]:
train_Dataset = CreateDataset(train)
batch_size=32
train_Dataloader = DataLoader(train_Dataset, batch_size = batch_size, shuffle=True)


val_Dataset = CreateDataset(val)
val_Dataloader = DataLoader(val_Dataset, batch_size = batch_size, shuffle=True)


test_Dataset = CreateDataset(test)
test_Dataloader = DataLoader(test_Dataset, batch_size = 1, shuffle=False)

## Step 4: Downloading and Creating custom Resnet Model 


In [12]:
model=models.resnet50(pretrained= True)

**Freezing all the layers in pretrained model**




In [13]:
def freeze(model):
    for name, child in model.named_children():
        for param in child.parameters():
            param.requires_grad = False
        freeze(child)


freeze(model)

**Creating custom Top Layers**

In [14]:
class CustomResnetModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = model
        self.fc1 = nn.Linear(in_features=2048, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=2)
        self.dropout=nn.Dropout(0.2)
        self.relu = nn.ReLU()
#         self.identity = nn.Identity()
        self.sigmoid=nn.Sigmoid()
        
    def forward(self, x):
        
        self.model.fc=nn.Identity()
        x=self.fc1(self.model(x))
        x=self.relu(x)
        x=self.dropout(x)
        x=self.fc2(x)
#         x=self.relu(x)
#         x=self.sigmoid(x)
        
        return x
    
    def weight_init():
        for block in custom_model.modules():
            if isinstance(block,nn.Linear):
                nn.init.kaiming_uniform_(block.weight)
                nn.init.constant_(block.bias,1)

    

In [15]:
custom_model=CustomResnetModel()
device = torch.device("cuda" if torch.cuda.is_available() 
                                  else "cpu")

**Collecting all trainable parameters**

In [16]:
params_to_update=[]
for name,param in custom_model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

	 fc1.weight
	 fc1.bias
	 fc2.weight
	 fc2.bias


**Initializing Loss Function and Optimizer**

In [17]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(params_to_update, lr=0.003)
custom_model.to(device)

CustomResnetModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
  

##Step 5 : Training the Model

In [18]:
def accuracy(output,target,batch_size):
    output=output.cpu().numpy()
    target=target.cpu().numpy()
    output=output>0
    output.astype(float)
    return np.sum(output==target)/(batch_size*2)

In [19]:
import time
torch.manual_seed(42)
EPOCH=10
loss_value_train=[]
loss_value_val=[]
eval_accuracy=[]
min_val_loss=float('inf')
for epoch in range(EPOCH):
    running_loss=0
    epoch_train_loss = []
    ### Train Loop
    custom_model.train()
    t0 = time.time()
    t1 = time.time()
    print("Training ... ")
    for i ,data in enumerate(train_Dataloader):
        image_name, img ,target = data

        optimizer.zero_grad()

        img = img.to(device)
        
        target = target.to(device)


        output = custom_model.forward(img)
        loss = criterion(output,target)
        
        running_loss += loss.item()
        epoch_train_loss.append(loss.item())
        
        loss.backward()
        optimizer.step()
        
        
        # print statistics
        if i % 100 == 99:    # print every 2000 mini-batches
            print(f"[{epoch + 1,i + 1}] loss : {round(running_loss / 100, 3)} time : {round(time.time() - t1,2)} sec")
#             print('[%d, %5d] loss: %.3f time : %' %
#                   (epoch + 1, i + 1, running_loss / 20),time.time() - t1)
            t1 = time.time()
            running_loss = 0.0

    print("Epoch -> ", epoch," Training Loss -> ",
          round(sum(epoch_train_loss)/len(train_Dataloader),3),f' time : {round(time.time() - t0,2)} sec')
    
    loss_value_train.append(sum(epoch_train_loss)/len(train_Dataloader))
    
    ### Eval Loop
    
    print("Evaluating...")
    
    
    custom_model.eval()
    
    
    with torch.no_grad():
        running_loss=0
        epoch_val_loss=[]
        running_accuracy=0
        
        
        for i ,data in enumerate(val_Dataloader):
            

            image_name, img ,target = data

            optimizer.zero_grad()

            img = img.to(device)

            target = target.to(device)


            output = custom_model.forward(img)
            loss = criterion(output,target)

            running_loss += loss.item()
            epoch_val_loss.append(loss.item())
            running_accuracy += accuracy(output,target,batch_size)
            
    total_val_loss=round(sum(epoch_val_loss)/len(val_Dataloader),3)
    print(" Evaluate Loss -> ",
          total_val_loss," Accuracy --> ",running_accuracy/len(val_Dataloader))
    
    loss_value_val.append(sum(epoch_val_loss)/len(val_Dataloader))
    
  
    if min_val_loss > total_val_loss:
        print("Writing Model at epoch ", epoch)
        torch.save(custom_model.state_dict(), 'model_state.pth')
        min_val_loss = total_val_loss
  




Training ... 
[(1, 100)] loss : 0.406 time : 17.76 sec
[(1, 200)] loss : 0.345 time : 17.66 sec
[(1, 300)] loss : 0.318 time : 17.73 sec
[(1, 400)] loss : 0.314 time : 17.88 sec
[(1, 500)] loss : 0.308 time : 17.92 sec
[(1, 600)] loss : 0.312 time : 18.0 sec
[(1, 700)] loss : 0.294 time : 18.02 sec
[(1, 800)] loss : 0.3 time : 18.01 sec
[(1, 900)] loss : 0.314 time : 17.93 sec
[(1, 1000)] loss : 0.302 time : 17.97 sec
[(1, 1100)] loss : 0.307 time : 17.99 sec
[(1, 1200)] loss : 0.327 time : 17.97 sec
[(1, 1300)] loss : 0.32 time : 17.98 sec
[(1, 1400)] loss : 0.317 time : 17.93 sec
[(1, 1500)] loss : 0.306 time : 17.96 sec
Epoch ->  0  Training Loss ->  0.318  time : 280.05 sec
Evaluating...
 Evaluate Loss ->  0.271  Accuracy -->  0.8885782747603834
Writing Model at epoch  0
Training ... 
[(2, 100)] loss : 0.288 time : 18.07 sec
[(2, 200)] loss : 0.294 time : 18.04 sec
[(2, 300)] loss : 0.309 time : 17.98 sec
[(2, 400)] loss : 0.288 time : 17.92 sec
[(2, 500)] loss : 0.287 time : 17.96

**The validation accuracy is around 88.91**

**Also the model is saved as model_state.pth**





**Testing accuracy for Test Split**

In [20]:
custom_model.eval()

from tqdm import tqdm
with torch.no_grad():
    running_loss=0
    epoch_val_loss=[]
    running_accuracy=0


    for data in tqdm(test_Dataloader):

        image_name, img ,target = data

        optimizer.zero_grad()

        img = img.to(device)

        target = target.to(device)


        output = custom_model.forward(img)

        
        running_accuracy += accuracy(output,target,1)
        
        
    
    print(" Accuracy --> ",running_accuracy/len(test_Dataloader))

100%|██████████| 5000/5000 [00:45<00:00, 110.31it/s]

 Accuracy -->  0.8887






**Test accuracy is 88.87**

