In [1]:
import cv2
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
from einops.layers.torch import Rearrange
from einops import rearrange
from facenet_pytorch import MTCNN
from self_attention_cv import TransformerEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import face_alignment
import requests
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!ls 'Videos7439' | wc -l

7439


In [3]:
!ls 'Spectrograms7439' | wc -l

7439


In [4]:
videos_folder = 'Videos7439'
spectrograms_folder = 'Spectrograms7439'

In [5]:
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()
        self.features = models.vgg16(pretrained=True)
        # Modify the first layer to accept 6 channel input
        self.features.features[0] = nn.Conv2d(6, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # Modify the final layer to output the desired number of classes
        self.features.classifier[6] = nn.Linear(self.features.classifier[6].in_features, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)
        x = self.softmax(x)
        return x


In [6]:
def extract_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    mid_frame_index = frame_count // 2  # Index of the frame in the middle of the video
    cap.set(cv2.CAP_PROP_POS_FRAMES, mid_frame_index)
    ret, frame = cap.read()
    if ret:
        cap.release()
        return frame
    else:
        cap.release()
        return None

In [7]:
def detect_face(frame):
    mtcnn = MTCNN()
    boxes, _ = mtcnn.detect(frame)
    if boxes is not None:
        # Assuming only one face in the frame
        box = boxes[0]
        x1, y1, x2, y2 = box
        # Crop the frame to the detected face
        cropped_frame = frame[int(y1):int(y2), int(x1):int(x2)]
        return cropped_frame
    else:
        return None

In [8]:
def preprocess_image(frame):
    # Convert the frame to a PIL Image
    frame_pil = Image.fromarray(frame.astype('uint8'))

    # Convert the image to RGB by duplicating channels
    frame_pil = frame_pil.convert('RGB')

    # Resize and normalize the frame
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for RGB
    ])
    img_tensor = transform(frame_pil)
    return img_tensor

In [9]:
def preprocess_spectrogram(image_path):
    img = Image.open(image_path).convert('RGB')  # Convert to RGB by duplicating channels
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize to match VGG input size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard normalization for RGB
    ])
    img_tensor = transform(img)
    return img_tensor

In [10]:
def load_spectrogram_dataset(spectrograms_folder, skipped_files):
    X = []
    y = []
    # List all files in the input folder
    files = sorted(os.listdir(spectrograms_folder))
    # Iterate over files in the folder
    for filename in tqdm(files):
        if filename.endswith(".png") and filename[:-3] not in skipped_files:  # Assuming mel spectrograms are stored as PNG files
            input_path = os.path.join(spectrograms_folder, filename)
            img_tensor = preprocess_spectrogram(input_path)
            X.append(img_tensor)
            # Extract label from filename (assuming filename is in format "abc_IEO_label_xyz.png")
            label = filename.split("_")[2]
            if label == "HAP":
                y.append(0)
            elif label == "SAD":
                y.append(1)
            elif label == "ANG":
                y.append(2)
            elif label == "DIS":
                y.append(3)
            elif label == "FEA":
                y.append(4)
            elif label == "NEU":
                y.append(5)
    return X, y

In [11]:
def load_dataset(videos_folder):
    X = []
    y = []
    skipped_files = []
    video_files = [file for file in sorted(os.listdir(videos_folder)) if file.endswith(".flv")]
    for video_file in tqdm(video_files):
        video_path = os.path.join(videos_folder, video_file)
        frame = extract_frame(video_path)
        if frame is not None:
            cropped_face = detect_face(frame)
            if cropped_face is not None:
                preprocessed_face = preprocess_image(cropped_face)
                X.append(preprocessed_face)
                label = video_file.split("_")[2].split(".")[0]  # Adjusted to handle different file extensions
                if label == "HAP":
                    y.append(0)
                elif label == "SAD":
                    y.append(1)
                elif label == "ANG":
                    y.append(2)
                elif label == "DIS":
                    y.append(3)
                elif label == "FEA":
                    y.append(4)
                elif label == "NEU":
                    y.append(5)
            else:
                print(f"No face detected in {video_file}. Skipping.")
                skipped_files.append(video_file[:-3])
        else:
            print(f"Failed to extract frame from {video_file}. Skipping.")
            skipped_files.append(video_file[:-3])
    return X, y, skipped_files

In [12]:
# Define the ConcatDataset class to concatenate video frame and spectrogram tensors
class ConcatDataset(torch.utils.data.Dataset):
    def __init__(self, X1, X2, y, modality='multimodal', fullscale=False):
        self.X1 = X1
        self.X2 = X2
        self.y = y
        self.modality = modality
        self.fullscale = fullscale
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        if not self.fullscale:
            img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
            img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
            label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor
        else:
            img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
            img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
            label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor

        concatenated_img = torch.cat((img1, img2), dim=0)  # Concatenate along 0 dimension
        if self.modality == 'visual':
            return img1, label
        if self.modality == 'audio':
            return img2, label
        return concatenated_img, label # concatenate modalities

In [13]:
# # Define the ConcatDataset class to concatenate video frame and spectrogram tensors
# class ConcatDataset(torch.utils.data.Dataset):
#     def __init__(self, X1, X2, y, modality='multimodal', fullscale=False):
#         self.X1 = X1
#         self.X2 = X2
#         self.y = y
#         self.modality = modality
#         self.fullscale = fullscale
#     def __len__(self):
#         return len(self.y)

#     def __getitem__(self, idx):
#         if not self.fullscale:
#             img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
#             img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
#             label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor
#         else:
#             img1 = torch.from_numpy(self.X1[idx]).float()  # Convert numpy array to torch tensor
#             img2 = torch.from_numpy(self.X2[idx]).float()  # Convert numpy array to torch tensor
#             label = torch.tensor(self.y[idx])  # Convert numpy array to torch tensor

#         concatenated_img = torch.cat((img1, img2), dim=0)  # Concatenate along 0 dimension
#         if self.modality == 'visual':
#             return img1, label
#         if self.modality == 'audio':
#             return img2, label
#         return concatenated_img, label # concatenate modalities

In [14]:
def train_model(model, criterion, optimizer, train_loader, device):
    model.train()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    for inputs, labels in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [15]:
def test_model(model, criterion, test_loader, device):
    model.eval()
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)
    epoch_loss = running_loss / len(test_loader.dataset)
    accuracy = correct_preds / total_preds
    return epoch_loss, accuracy

In [16]:

  # Load numpy arrays with memory-mapping
X = np.load('X.npy', mmap_mode='r')
y = np.load('y.npy', mmap_mode='r')
X_spec = np.load('X_spec.npy', mmap_mode='r')
y_spec = np.load('y_spec.npy', mmap_mode='r')

# Split the data into train and test sets
print(f"Total number of samples: {len(X)}")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(f"Number of train samples (video): {len(X_train)}", f"Number of test samples: {len(X_test)}")
X_train_spec, X_test_spec, y_train_spec, y_test_spec = train_test_split(X_spec, y_spec, test_size=0.3, random_state=42)
print(f"Number of train samples (audio): {len(X_train_spec)}", f"Number of test samples: {len(X_test_spec)}")

Total number of samples: 7429
Number of train samples (video): 5200 Number of test samples: 2229
Number of train samples (audio): 5200 Number of test samples: 2229


In [17]:
# Initialize the model
model = CNN(num_classes=6)  # 3 classes for HAPPY, SAD, ANGRY
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define loss function and optimizer
_lr = 0.00001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=_lr)

# Concatenate datasets
train_dataset = ConcatDataset(X_train, X_train_spec, y_train)
test_dataset = ConcatDataset(X_test, X_test_spec, y_test)

# Create data loaders
_bs = 32
# train_loader = torch.utils.data.DataLoader(list(zip(X_train, y_train)), batch_size=_bs, shuffle=True)
# test_loader = torch.utils.data.DataLoader(list(zip(X_test, y_test)), batch_size=_bs)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=_bs, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=_bs)


print(f"Batch size: {_bs}", f"lr: {_lr}")

Batch size: 32 lr: 1e-05


In [18]:
# Training loop
num_epochs = 50
for epoch in range(num_epochs):
    print("Epoch " + str(epoch))
    train_loss, train_accuracy = train_model(model, criterion, optimizer, train_loader, device)
    test_loss, test_accuracy = test_model(model, criterion, test_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 0


100%|██████████| 163/163 [00:19<00:00,  8.54it/s]
100%|██████████| 70/70 [00:03<00:00, 20.81it/s]


Epoch 1/50, Train Loss: 1.7155, Train Accuracy: 0.3012, Test Loss: 1.6418, Test Accuracy: 0.3809
Epoch 1


100%|██████████| 163/163 [00:18<00:00,  8.99it/s]
100%|██████████| 70/70 [00:03<00:00, 22.70it/s]


Epoch 2/50, Train Loss: 1.6222, Train Accuracy: 0.4017, Test Loss: 1.6228, Test Accuracy: 0.4074
Epoch 2


100%|██████████| 163/163 [00:18<00:00,  9.00it/s]
100%|██████████| 70/70 [00:03<00:00, 22.65it/s]


Epoch 3/50, Train Loss: 1.5605, Train Accuracy: 0.4744, Test Loss: 1.5595, Test Accuracy: 0.4805
Epoch 3


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.83it/s]


Epoch 4/50, Train Loss: 1.5101, Train Accuracy: 0.5256, Test Loss: 1.5110, Test Accuracy: 0.5280
Epoch 4


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.73it/s]


Epoch 5/50, Train Loss: 1.4680, Train Accuracy: 0.5679, Test Loss: 1.4935, Test Accuracy: 0.5437
Epoch 5


100%|██████████| 163/163 [00:18<00:00,  9.00it/s]
100%|██████████| 70/70 [00:03<00:00, 22.85it/s]


Epoch 6/50, Train Loss: 1.4376, Train Accuracy: 0.6035, Test Loss: 1.5492, Test Accuracy: 0.4805
Epoch 6


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.80it/s]


Epoch 7/50, Train Loss: 1.4076, Train Accuracy: 0.6317, Test Loss: 1.4751, Test Accuracy: 0.5621
Epoch 7


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.76it/s]


Epoch 8/50, Train Loss: 1.3652, Train Accuracy: 0.6804, Test Loss: 1.4364, Test Accuracy: 0.6074
Epoch 8


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.82it/s]


Epoch 9/50, Train Loss: 1.3456, Train Accuracy: 0.6965, Test Loss: 1.4457, Test Accuracy: 0.5891
Epoch 9


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.74it/s]


Epoch 10/50, Train Loss: 1.3068, Train Accuracy: 0.7377, Test Loss: 1.4173, Test Accuracy: 0.6209
Epoch 10


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.83it/s]


Epoch 11/50, Train Loss: 1.2933, Train Accuracy: 0.7523, Test Loss: 1.4443, Test Accuracy: 0.5935
Epoch 11


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.93it/s]


Epoch 12/50, Train Loss: 1.2666, Train Accuracy: 0.7790, Test Loss: 1.4086, Test Accuracy: 0.6335
Epoch 12


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.87it/s]


Epoch 13/50, Train Loss: 1.2500, Train Accuracy: 0.7958, Test Loss: 1.4134, Test Accuracy: 0.6240
Epoch 13


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.74it/s]


Epoch 14/50, Train Loss: 1.2338, Train Accuracy: 0.8121, Test Loss: 1.4032, Test Accuracy: 0.6330
Epoch 14


100%|██████████| 163/163 [00:18<00:00,  9.01it/s]
100%|██████████| 70/70 [00:03<00:00, 22.77it/s]


Epoch 15/50, Train Loss: 1.2174, Train Accuracy: 0.8292, Test Loss: 1.4154, Test Accuracy: 0.6214
Epoch 15


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.86it/s]


Epoch 16/50, Train Loss: 1.2096, Train Accuracy: 0.8352, Test Loss: 1.3956, Test Accuracy: 0.6447
Epoch 16


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.92it/s]


Epoch 17/50, Train Loss: 1.1944, Train Accuracy: 0.8512, Test Loss: 1.4008, Test Accuracy: 0.6380
Epoch 17


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.87it/s]


Epoch 18/50, Train Loss: 1.1878, Train Accuracy: 0.8569, Test Loss: 1.4038, Test Accuracy: 0.6312
Epoch 18


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.89it/s]


Epoch 19/50, Train Loss: 1.1777, Train Accuracy: 0.8671, Test Loss: 1.3858, Test Accuracy: 0.6528
Epoch 19


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.87it/s]


Epoch 20/50, Train Loss: 1.1657, Train Accuracy: 0.8794, Test Loss: 1.4018, Test Accuracy: 0.6339
Epoch 20


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.90it/s]


Epoch 21/50, Train Loss: 1.1542, Train Accuracy: 0.8915, Test Loss: 1.4035, Test Accuracy: 0.6344
Epoch 21


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.89it/s]


Epoch 22/50, Train Loss: 1.1560, Train Accuracy: 0.8904, Test Loss: 1.3935, Test Accuracy: 0.6451
Epoch 22


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.78it/s]


Epoch 23/50, Train Loss: 1.1507, Train Accuracy: 0.8944, Test Loss: 1.4109, Test Accuracy: 0.6200
Epoch 23


100%|██████████| 163/163 [00:18<00:00,  9.02it/s]
100%|██████████| 70/70 [00:03<00:00, 22.89it/s]


Epoch 24/50, Train Loss: 1.1418, Train Accuracy: 0.9013, Test Loss: 1.3934, Test Accuracy: 0.6474
Epoch 24


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.96it/s]


Epoch 25/50, Train Loss: 1.1335, Train Accuracy: 0.9115, Test Loss: 1.3725, Test Accuracy: 0.6689
Epoch 25


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.94it/s]


Epoch 26/50, Train Loss: 1.1332, Train Accuracy: 0.9113, Test Loss: 1.4149, Test Accuracy: 0.6187
Epoch 26


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.94it/s]


Epoch 27/50, Train Loss: 1.1344, Train Accuracy: 0.9098, Test Loss: 1.3894, Test Accuracy: 0.6478
Epoch 27


100%|██████████| 163/163 [00:18<00:00,  9.00it/s]
100%|██████████| 70/70 [00:03<00:00, 22.93it/s]


Epoch 28/50, Train Loss: 1.1290, Train Accuracy: 0.9150, Test Loss: 1.3882, Test Accuracy: 0.6523
Epoch 28


100%|██████████| 163/163 [00:18<00:00,  9.03it/s]
100%|██████████| 70/70 [00:03<00:00, 22.85it/s]


Epoch 29/50, Train Loss: 1.1223, Train Accuracy: 0.9231, Test Loss: 1.3818, Test Accuracy: 0.6581
Epoch 29


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.93it/s]


Epoch 30/50, Train Loss: 1.1237, Train Accuracy: 0.9212, Test Loss: 1.3868, Test Accuracy: 0.6519
Epoch 30


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.94it/s]


Epoch 31/50, Train Loss: 1.1185, Train Accuracy: 0.9262, Test Loss: 1.3784, Test Accuracy: 0.6595
Epoch 31


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.91it/s]


Epoch 32/50, Train Loss: 1.1157, Train Accuracy: 0.9285, Test Loss: 1.3827, Test Accuracy: 0.6563
Epoch 32


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.93it/s]


Epoch 33/50, Train Loss: 1.1226, Train Accuracy: 0.9215, Test Loss: 1.3897, Test Accuracy: 0.6474
Epoch 33


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.99it/s]


Epoch 34/50, Train Loss: 1.1222, Train Accuracy: 0.9221, Test Loss: 1.3710, Test Accuracy: 0.6680
Epoch 34


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.94it/s]


Epoch 35/50, Train Loss: 1.1169, Train Accuracy: 0.9271, Test Loss: 1.3757, Test Accuracy: 0.6662
Epoch 35


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.90it/s]


Epoch 36/50, Train Loss: 1.1178, Train Accuracy: 0.9269, Test Loss: 1.3898, Test Accuracy: 0.6514
Epoch 36


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.96it/s]


Epoch 37/50, Train Loss: 1.1115, Train Accuracy: 0.9327, Test Loss: 1.3993, Test Accuracy: 0.6406
Epoch 37


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.95it/s]


Epoch 38/50, Train Loss: 1.1196, Train Accuracy: 0.9252, Test Loss: 1.3697, Test Accuracy: 0.6676
Epoch 38


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.97it/s]


Epoch 39/50, Train Loss: 1.1199, Train Accuracy: 0.9242, Test Loss: 1.3894, Test Accuracy: 0.6456
Epoch 39


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.96it/s]


Epoch 40/50, Train Loss: 1.1122, Train Accuracy: 0.9323, Test Loss: 1.4021, Test Accuracy: 0.6357
Epoch 40


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.95it/s]


Epoch 41/50, Train Loss: 1.1106, Train Accuracy: 0.9340, Test Loss: 1.4133, Test Accuracy: 0.6281
Epoch 41


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.95it/s]


Epoch 42/50, Train Loss: 1.1170, Train Accuracy: 0.9277, Test Loss: 1.4023, Test Accuracy: 0.6344
Epoch 42


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.95it/s]


Epoch 43/50, Train Loss: 1.1073, Train Accuracy: 0.9377, Test Loss: 1.3712, Test Accuracy: 0.6662
Epoch 43


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.96it/s]


Epoch 44/50, Train Loss: 1.1071, Train Accuracy: 0.9375, Test Loss: 1.3702, Test Accuracy: 0.6712
Epoch 44


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.93it/s]


Epoch 45/50, Train Loss: 1.1040, Train Accuracy: 0.9398, Test Loss: 1.3809, Test Accuracy: 0.6577
Epoch 45


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.92it/s]


Epoch 46/50, Train Loss: 1.1045, Train Accuracy: 0.9388, Test Loss: 1.3668, Test Accuracy: 0.6729
Epoch 46


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.96it/s]


Epoch 47/50, Train Loss: 1.1010, Train Accuracy: 0.9431, Test Loss: 1.3630, Test Accuracy: 0.6770
Epoch 47


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.92it/s]


Epoch 48/50, Train Loss: 1.0990, Train Accuracy: 0.9448, Test Loss: 1.3763, Test Accuracy: 0.6640
Epoch 48


100%|██████████| 163/163 [00:18<00:00,  9.05it/s]
100%|██████████| 70/70 [00:03<00:00, 22.91it/s]


Epoch 49/50, Train Loss: 1.1055, Train Accuracy: 0.9385, Test Loss: 1.3664, Test Accuracy: 0.6703
Epoch 49


100%|██████████| 163/163 [00:18<00:00,  9.04it/s]
100%|██████████| 70/70 [00:03<00:00, 22.87it/s]

Epoch 50/50, Train Loss: 1.1081, Train Accuracy: 0.9356, Test Loss: 1.3785, Test Accuracy: 0.6608





In [19]:
torch.save(model.state_dict(), 'vgg16_audio_video_'+str(num_epochs)+'_'+str(_bs)+'_'+str(_lr))