In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime

import pandas as pd
import numpy as np
import seaborn as sns

from PIL import Image

import torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
dtype = torch.float
device = torch.device("cpu")

In [2]:
df = pd.read_csv(r"male and female voices\train\train\targets.tsv", sep='\t', \
                      names=['id', 'gender'])
 
df.head()

Unnamed: 0,id,gender
0,5d1f7e43366513a1d0a6ec5640c3dc24,1
1,9a701a4536a05b6610a590a9fe702ed8,1
2,cad0b8547008d1524c1a0e5fd51f9908,1
3,4bbe607e7dc95460e2cc1a6ee5f4dfa6,0
4,30fb32cba90b34af26f3f14f5d636805,0


In [3]:
model = torchvision.models.resnet34(weights='DEFAULT')
model.fc = nn.Linear(512, 1)
model = nn.Sequential(
    model,
    nn.Sigmoid()
)

loss_fn = torch.nn.BCELoss()

In [4]:
PATH = 'mfcc\\train\\train\\'
convert_tensor = torchvision.transforms.ToTensor()

x = Image.open(PATH + df.iloc[1]['id'] + '.jpg')
x = convert_tensor(x)
x = x[None, :]
y = df.iloc[0]['gender']
y = torch.tensor([y,], dtype=torch.float)
y = y[None, :]

model_out = model(x)
loss = loss_fn(model_out, y)

In [5]:
print(y[0, 0])
print(model_out[0, 0])
print(loss)

tensor(1.)
tensor(0.6172, grad_fn=<SelectBackward0>)
tensor(0.4826, grad_fn=<BinaryCrossEntropyBackward0>)


In [7]:
from torch.utils.data import Dataset, DataLoader

class JpgDataset(Dataset):
     def __init__(self, df):
         super(Dataset, self).__init__()
         self.PATH = 'mfcc\\train\\train\\'
         self.df = df
        
     def __len__(self):
         return len(self.df)
        
     def __getitem__(self, index):
        convert_tensor = torchvision.transforms.ToTensor()
        
        x = Image.open(self.PATH + df.iloc[index]['id'] + '.jpg')
        x = convert_tensor(x)
         
        y = df.iloc[index]['gender']
        y = torch.tensor([y,], dtype=torch.float)
         
        return x, y

In [8]:
dataset = JpgDataset(df)

In [9]:
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [0.8, 0.2])
print(len(train_dataset), len(val_dataset))

11149 2787


In [22]:
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0,
    collate_fn=None,
    pin_memory=False,
 )

validation_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    collate_fn=None,
    pin_memory=False,
 )

In [23]:
train_features, train_labels = next(iter(train_loader))
train_features.shape, train_labels.shape

(torch.Size([32, 3, 224, 224]), torch.Size([32, 1]))

In [24]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)

In [25]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_loader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 20 == 0 and i > 0:
            last_loss = running_loss / 20 # loss per batch
            print('  batch {} loss: {}'.format(i, last_loss))
            tb_x = epoch_index * len(train_loader) + i
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.

    return last_loss

In [26]:
# Initializing in a separate cell so we can easily add more epochs to the same run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 1

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer)


    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata
            voutputs = model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.flush()

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)
        
    epoch_number += 1

EPOCH 1:
  batch 20 loss: 0.04495586771517992
  batch 40 loss: 0.031232540705241264
  batch 60 loss: 0.03658239771611989
  batch 80 loss: 0.04373718770220876
  batch 100 loss: 0.03901425050571561
  batch 120 loss: 0.029875687672756612
  batch 140 loss: 0.047041407856158915
  batch 160 loss: 0.029163155891001224
  batch 180 loss: 0.036206843936815856
  batch 200 loss: 0.02235777131281793
  batch 220 loss: 0.02238837619079277
  batch 240 loss: 0.03694824620615691
  batch 260 loss: 0.0263868891983293
  batch 280 loss: 0.028784834139514714
  batch 300 loss: 0.04276459641987458
  batch 320 loss: 0.02133773671230301
  batch 340 loss: 0.04827359323389828
LOSS train 0.04827359323389828 valid 0.04163802042603493


In [21]:
# saved_model = GarmentClassifier()
# saved_model.load_state_dict(torch.load(PATH))
model.load_state_dict(torch.load("model_20230620_194453_0"))

<All keys matched successfully>

In [27]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x
            y = y
            
            scores = model(x).tolist()
            for i, score in enumerate(scores):
                if score[0] > .5:
                    l = 1
                else:
                    l = 0
                if l == int(y[i]):
                    num_correct += 1
                num_samples += 1
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 
    
    model.train()

check_accuracy(validation_loader, model)

Got 2743 / 2787 with accuracy 98.42


Prediction

In [28]:
from glob import glob
jpg_files = glob("mfcc\\test\\*\\*.jpg")

In [29]:
l = list()
for file in jpg_files:
    l.append(file.split('\\')[-1][:-4])

In [30]:
l2 = [0] * len(jpg_files)

In [32]:
predict_df = pd.DataFrame()
predict_df['id'] = l
predict_df['gender'] = l2

In [33]:
predict_df

Unnamed: 0,id,gender
0,00100026dbdffcd01cde6ee9b9a9d273,0
1,0014278276a6cc05fe8c522af0a677df,0
2,0026e20266ceba9cdda2c116e89d2f3b,0
3,008b37fc832d3dfad9105961c5801c02,0
4,00b1d7d0d6fdb25050041f6e2ae2871e,0
...,...,...
3408,ffbea6a332d113f0bd16cf0b4ed07dc6,0
3409,ffee25334e0247dd07d80d0eb3ce7886,0
3410,fff86627949b37d236237ff895ff4268,0
3411,fff8aa9bac38e85734f64aad2b2f3814,0


In [35]:
model.eval()
PATH = "mfcc\\test\\test\\"
convert_tensor = torchvision.transforms.ToTensor()

x = Image.open(PATH + predict_df.iloc[3]['id'] + '.jpg')
x = convert_tensor(x)
x = x[None, :]

model_out = model(x)
model_out, predict_df.iloc[4]['id']

(tensor([[0.9994]], grad_fn=<SigmoidBackward0>),
 '00b1d7d0d6fdb25050041f6e2ae2871e')

In [36]:
model.eval()
PATH = "mfcc\\test\\test\\"
convert_tensor = torchvision.transforms.ToTensor()

with torch.no_grad():
    for i in range(len(predict_df)):
        x = Image.open(PATH + predict_df.iloc[i]['id'] + '.jpg')
        x = convert_tensor(x)
        x = x[None, :]
        model_out = model(x)
        l = int(model_out.tolist()[0][0] > .5)
        predict_df.at[i, 'gender'] = l
   

In [37]:
predict_df

Unnamed: 0,id,gender
0,00100026dbdffcd01cde6ee9b9a9d273,1
1,0014278276a6cc05fe8c522af0a677df,1
2,0026e20266ceba9cdda2c116e89d2f3b,1
3,008b37fc832d3dfad9105961c5801c02,1
4,00b1d7d0d6fdb25050041f6e2ae2871e,0
...,...,...
3408,ffbea6a332d113f0bd16cf0b4ed07dc6,1
3409,ffee25334e0247dd07d80d0eb3ce7886,0
3410,fff86627949b37d236237ff895ff4268,0
3411,fff8aa9bac38e85734f64aad2b2f3814,0


In [38]:
predict_df.to_csv('answers.tsv', sep="\t", index=False, header=False, encoding='utf-8')