In [None]:
import os
import pandas as pd
from torchvision.io import read_image
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor, transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
%matplotlib inline

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

# Input image reading and loading using DataLoader

In [None]:
class Imageread(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.df = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.df.iloc[idx]['filename'])
        image = read_image(img_path)/255.0
        label = self.df.iloc[idx]['age']/100.0
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


In [None]:
dataset_name = "project2-data-80k-10k-10k"
#dataset_name = "sample-dataset-project2"

train_csv = f'../input/{dataset_name}/Data_train.csv'
val_csv = f'../input/{dataset_name}/Data_validation.csv'
test_csv = f'../input/{dataset_name}/Data_test.csv'
train_data = f'../input/{dataset_name}/Training_data'
val_data = f'../input/{dataset_name}/Validation_data'
test_data = f'../input/{dataset_name}/Testing_data'

In [None]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

# data_transform = transforms.Compose([
#     transforms.Normalize(mean = (0.445,), std = (0.269,))
# ])

training_data = Imageread(train_csv, train_data, transform=None, target_transform=None)
train_dataloader = DataLoader(training_data, batch_size=100, shuffle=True, num_workers = 2)

validation_data = Imageread(val_csv, val_data, transform=None, target_transform=None)
validation_dataloader = DataLoader(validation_data, batch_size=100, shuffle=True, num_workers = 2)

# Model Architecture

In [None]:
# Architecture Model
#model = 
# model = nn.Sequential(
#            nn.Conv2d(1, 64, kernel_size=5),
#            nn.ReLU(),
#            #nn.AvgPool2d(2),
#            nn.MaxPool2d(2,2),
#            nn.Conv2d(64,128, kernel_size=5),
#            nn.ReLU(),
#            nn.MaxPool2d(2,2),
#            #nn.AvgPool2d(2),
#            nn.Flatten(),
#            nn.Linear(128*13*13,4096),
#            nn.ReLU(),
#            nn.Linear(4096, 1),
#          )
model = nn.Sequential(
           nn.Conv2d(1, 64, kernel_size=5),
           nn.BatchNorm2d(64),
           nn.ReLU(),
           nn.MaxPool2d(2,2),
           nn.Dropout(0.5),
    
           nn.Conv2d(64,128, kernel_size=5),
           nn.BatchNorm2d(128),
           nn.ReLU(),
           nn.MaxPool2d(2,2),
           nn.Dropout(0.5),
          
           nn.Flatten(),
           nn.Linear(128*13*13,4096),
           nn.BatchNorm1d(4096),
           nn.ReLU(),
           nn.Linear(4096, 1),
         )

net = model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0005)

print(net)

# Training and validation

In [None]:
NUM_epoch = 25
validation_loss = np.zeros(NUM_epoch)
training_loss = np.zeros(NUM_epoch)

for epoch in range(NUM_epoch):
    #print(f"epoch: {epoch}")
    # Training
    train_loss = 0.0
    train_len = 0
    for inputs, labels in train_dataloader:
        inputs = inputs.to(device)
        labels = labels.float().view(-1,1).to(device)
        train_len = train_len + 1
        
        # Model computation
        outputs = net(inputs)
        #print('inputs:', inputs)
#         print('outputs',torch.transpose(outputs,0,1))
#         print('labels',labels)
        loss = criterion(outputs, labels) #calculating the predicted and the expected loss 
        
        optimizer.zero_grad()
        loss.backward()  #compute the gradient
        optimizer.step()
        
        train_loss += loss.item()
    training_loss[epoch] = train_loss/train_len
#     print('outputs',torch.transpose(outputs,0,1))
#     print('labels',labels)
    
    #validation
    with torch.no_grad():
        val_len=0
        val_loss = 0.0
        for inputs, labels in validation_dataloader:
            inputs = inputs.to(device)
            labels = labels.float().view(-1,1).to(device)
            val_len = val_len + 1
           
            # Model computation
            outputs = net(inputs)
            loss = criterion(outputs, labels) #calculating the predicted and the expected loss

            val_loss += loss.item()
        validation_loss[epoch] = val_loss/val_len
    print(f"epoch: {epoch}, train_loss: {training_loss[epoch]:.6f}, validation_loss: {validation_loss[epoch]:.6f}")
    if epoch>1 and np.abs(validation_loss[epoch]-validation_loss[epoch-1])<10**(-5):
        print('validation loss converges')
        break
    

In [None]:
#saving and plotting the loss values
df = pd.DataFrame({
    'epoch':range(epoch+1),
    'training_loss':training_loss[0:epoch+1],
    'validation_loss':validation_loss[0:epoch+1]
})
df
ax= df.iloc[0:].plot(y=['training_loss','validation_loss'],figsize= (12,6),xticks=df.index, fontsize=15,marker ='o', rot = 45)
ax.set_ylabel('loss',fontsize=15)
ax.set_xlabel('Epoch',fontsize=15)
plt.savefig('loss.jpg')
plt.show()
df.to_csv('loss.csv')


# Testing the network

In [None]:
testdata = Imageread(test_csv, test_data, transform=None, target_transform=None)
test_dataloader = DataLoader(testdata, batch_size=100, shuffle=False, num_workers = 2)

In [None]:
with torch.no_grad():
    test_len=0
    test_loss=0.0
    outputs_array = np.array([])
    labels_array = np.array([])
    for inputs, labels in test_dataloader:
        inputs = inputs.to(device)
        labels = labels.float().view(-1, 1).to(device)
        test_len = test_len + 1
        #labels = labels.type(torch.FloatTensor)
        # Model computation
        outputs = net(inputs)
        outputs_array = np.append(outputs_array,outputs.detach().cpu().numpy())
        labels_array = np.append(labels_array,labels.cpu())
    #     print('output:', torch.transpose(outputs,0,1))
    #     print('labels:', labels)
        loss = criterion(outputs, labels) #calculating the predicted and the expected loss

        test_loss += loss.item()

    print(test_loss/test_len)

In [None]:
df_test = pd.DataFrame({
    'true_age':np.round(labels_array*100,1),
    'predict_age': np.round(outputs_array*100,1),
})
ax = df_test.plot(y=['true_age','predict_age'],figsize= (12,6),xticks=df_test.index, fontsize=15,marker ='o', rot = 45)
ax.set_ylabel('Age', fontsize = 16)
ax.set_xticks([])
plt.savefig('test.jpg')

df_test.to_csv('Test_results.csv')

In [None]:
#pd.set_option('display.max_rows', None)
print(df_test)

# Visualization

In [None]:
import torchvision

In [None]:
def imshow(img):
    img = img #.int()#*255     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
testdata = Imageread(test_csv, test_data, transform=None, target_transform=None)
test_dataloader = DataLoader(testdata, batch_size=8, shuffle=True)
with torch.no_grad():
    for inputs, labels in test_dataloader:
        imshow(torchvision.utils.make_grid(inputs))

        inputs = inputs.to(device)
        labels = labels.float().view(-1, 1).numpy()

        # Model computation
        outputs=net(inputs)


        print('True Age   : ', end = '')
        print(' '.join(f'{labels[j][0]*100:.2f},' for j in range(8)))
        print('Predict Age: ', end = '')
        print(' '.join(f'{outputs[j][0]*100:.2f},' for j in range(8)))
        break

    
    


In [None]:
torch.save(net.state_dict(),'net_weights.pth')

In [None]:
start = np.random.randint(0, 500)
ax = df_test.iloc[start:start+100].plot(y=['true_age','predict_age'],figsize= (12,6),fontsize=15,marker ='o', rot = 45)
ax.set_ylabel('Age', fontsize = 16)
ax.set_xticks([])