In [1]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install tensorboard

Looking in indexes: https://download.pytorch.org/whl/cu118
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torchvision.models as models




MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [35]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch.utils.data import random_split

folder_path = '../../mfcc'
class MyDataset(Dataset):
    def __init__(self, folder_path,transform=None):
        self.data = []
        self.labels = []
        self.max_length = 0
        self.feature_count = 0
        self.transform = transform
        label_map = {}  # 用于映射字符标签到整数标签的字典
        label_index = 0
        for file_name in os.listdir(folder_path):
            if file_name.endswith('embedding_txt'):
                self.feature_count += 1
                file_path = os.path.join(folder_path, file_name)
                with open(file_path, 'r') as f:
                    for line in f:
                        mfcc_features = [float(x) for x in line.split()]
                        if len(mfcc_features) > self.max_length:
                           self.max_length = len(mfcc_features)
        dataset_size = self.feature_count
        print("Feature amounts: ",self.feature_count)
        print("Dataset size: ",dataset_size)
                    
        for file_name in os.listdir(folder_path):
            if file_name.endswith('embedding_txt'):
                file_path = os.path.join(folder_path, file_name)
                with open(file_path, 'r') as f:
                    for line in f:
                        mfcc_features = [float(x) for x in line.split()]
                        # 填充特征向量到相同长度
                        if len(mfcc_features)< self.max_length:
                            mfcc_features = self.paddingByMaxLength(mfcc_features)
                        mfcc_features_tensor = torch.tensor(mfcc_features, dtype=torch.float32)
                        self.data.append(mfcc_features_tensor)
                        speaker_id = os.path.basename(file_path)[:3]
                        if speaker_id not in label_map:
                            label_map[speaker_id] = label_index
                            label_index += 1
                        self.labels.append(label_map[speaker_id])
                        
    def paddingByMaxLength(self, features):
        if len(features) < self.max_length:
            padded_features = features + [0.0] * (self.max_length - len(features))
            return padded_features
        return features

    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        features_tensor = self.data[idx]
        label_tensor = torch.tensor(int(self.labels[idx]), dtype=torch.long)
        return {'features': features_tensor, 'label': label_tensor}

class ToTensor(object):
    def __call__(self, sample):
        features, label = sample['features'], sample['label']
        return {'features': torch.tensor(features, dtype=torch.float32),
                'label': torch.tensor(label, dtype=torch.long)}

dataset_size = 0    
dataset = MyDataset(folder_path)       
batch_size = 64
train_size = int(0.8*len(dataset))
val_size = int(0.1*len(dataset))
test_size = int(0.1*len(dataset))
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size]) #For MFCC
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size]) #For MFCC Specturm   
def train_dataloader(train_dataset):
        return DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

def val_dataloader(val_dataset):
        return DataLoader(dataset=val_dataset, batch_size=batch_size)

def test_dataloader(test_dataset):
        return DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

#loading dataloader
trainloader = train_dataloader(train_dataset)
validationloader = val_dataloader(val_dataset)
testloader = test_dataloader(test_dataset)


Feature amounts:  760
Dataset size:  760


In [5]:

for data in trainloader:
    print("Features shape:", data['features'].shape)

Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([64, 5096])
Features shape: torch.Size([32, 5096])


In [7]:
resNetModel = models.resnet50(pretrained = True)
# 获取第一个卷积层的权重
conv1_weight = resNetModel.conv1.weight

# 将通道数修改为1
modified_conv1_weight = conv1_weight[:, :1, :, :]

# 修改模型的第一个卷积层的权重
resNetModel.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
resNetModel.conv1.weight.data = modified_conv1_weight

# 检查修改后的模型结构
print(resNetModel)

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [23]:
from torch import nn
from torch import optim
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resNetModel.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resNetModel.parameters(), lr=0.001)

log_dir = "./logs/resNet50"
writer = SummaryWriter(log_dir)

best_accuracy = 0.0  # 初始化最佳准确率为0
best_model_path = "./best_restNetmodel.pth"  # 模型保存路径

print("Number of mini-batches in one epoch:", len(trainloader))
for epoch in range(100):
    running_loss = 0.0  
    for i, data in enumerate(trainloader, 0):
        inputs_origin = data['features']
        inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
        inputs, labels = inputs_new.to(device), data['label'].to(device)  
        optimizer.zero_grad()

        outputs = resNetModel(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % len(trainloader) == len(trainloader) - 1:
            average_loss = running_loss / 100
            print(f"Epoch {epoch+1}, Batch {i+1}, Loss: {average_loss:.6f}")

            global_step = epoch * len(trainloader) + i
            writer.add_scalar("Loss", average_loss, global_step)

            running_loss = 0.0

    # 在每个epoch结束后执行验证集评估
    correct = 0
    total = 0
    with torch.no_grad():
        for data in validationloader:
            inputs_origin = data['features']
            inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
            inputs, labels = inputs_new.to(device), data['label'].to(device)
            outputs = resNetModel(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * (correct / total)
    print(f"Epoch {epoch+1}, Validation Accuracy: {accuracy:.6f}%")

    # 如果当前模型在验证集上表现优于之前的最佳表现，则保存当前模型参数
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(resNetModel.state_dict(), best_model_path)
        print("Best model saved with validation accuracy:", best_accuracy)

print("resNetModel Training finished")

Number of mini-batches in one epoch: 10
Epoch 1, Batch 10, Loss: 0.000494
Epoch 1, Validation Accuracy: 72.368421%
Best model saved with validation accuracy: 72.36842105263158
Epoch 2, Batch 10, Loss: 0.001070
Epoch 2, Validation Accuracy: 64.473684%
Epoch 3, Batch 10, Loss: 0.001933
Epoch 3, Validation Accuracy: 59.210526%
Epoch 4, Batch 10, Loss: 0.002360
Epoch 4, Validation Accuracy: 60.526316%
Epoch 5, Batch 10, Loss: 0.003223
Epoch 5, Validation Accuracy: 68.421053%
Epoch 6, Batch 10, Loss: 0.004056
Epoch 6, Validation Accuracy: 67.105263%
Epoch 7, Batch 10, Loss: 0.000960
Epoch 7, Validation Accuracy: 64.473684%
Epoch 8, Batch 10, Loss: 0.002137
Epoch 8, Validation Accuracy: 59.210526%
Epoch 9, Batch 10, Loss: 0.000744
Epoch 9, Validation Accuracy: 68.421053%
Epoch 10, Batch 10, Loss: 0.002651
Epoch 10, Validation Accuracy: 71.052632%
Epoch 11, Batch 10, Loss: 0.001021
Epoch 11, Validation Accuracy: 59.210526%
Epoch 12, Batch 10, Loss: 0.002736
Epoch 12, Validation Accuracy: 64.4

In [29]:
mobileNetModel = models.mobilenet_v2(pretrained = True)
# 修改第一个卷积层的输入通道数
mobileNetModel.features[0][0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

# 打印修改后的模型结构
print(mobileNetModel)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=



In [31]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mobileNetModel.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobileNetModel.parameters(), lr=0.001)

log_dir = "./logs/mobileNet_v2"
writer = SummaryWriter(log_dir)

best_accuracy = 0.0  # 初始化最佳准确率为0
best_model_path = "./best_mobilemodel.pth"  # 模型保存路径

print("Number of mini-batches in one epoch:", len(trainloader))
for epoch in range(100):
    running_loss = 0.0  
    for i, data in enumerate(trainloader, 0):
        inputs_origin = data['features']
        inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
        inputs, labels = inputs_new.to(device), data['label'].to(device)  
        optimizer.zero_grad()

        outputs = mobileNetModel(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % len(trainloader) == len(trainloader) - 1:
            average_loss = running_loss / 100
            print(f"Epoch {epoch+1}, Batch {i+1}, Loss: {average_loss:.6f}")

            global_step = epoch * len(trainloader) + i
            writer.add_scalar("Loss", average_loss, global_step)

            running_loss = 0.0

    # 在每个epoch结束后执行验证集评估
    correct = 0
    total = 0
    with torch.no_grad():
        for data in validationloader:
            inputs_origin = data['features']
            inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
            inputs, labels = inputs_new.to(device), data['label'].to(device)
            outputs = mobileNetModel(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * (correct / total)
    print(f"Epoch {epoch+1}, Validation Accuracy: {accuracy:.6f}%")

    # 如果当前模型在验证集上表现优于之前的最佳表现，则保存当前模型参数
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(mobileNetModel.state_dict(), best_model_path)
        print("Best model saved with validation accuracy:", best_accuracy)

print("mobileNetModel Training finished")

Number of mini-batches in one epoch: 10
Epoch 1, Batch 10, Loss: 0.360613
Epoch 1, Validation Accuracy: 34.210526%
Best model saved with validation accuracy: 34.21052631578947
Epoch 2, Batch 10, Loss: 0.152699
Epoch 2, Validation Accuracy: 46.052632%
Best model saved with validation accuracy: 46.05263157894737
Epoch 3, Batch 10, Loss: 0.099257
Epoch 3, Validation Accuracy: 52.631579%
Best model saved with validation accuracy: 52.63157894736842
Epoch 4, Batch 10, Loss: 0.063650
Epoch 4, Validation Accuracy: 55.263158%
Best model saved with validation accuracy: 55.26315789473685
Epoch 5, Batch 10, Loss: 0.037397
Epoch 5, Validation Accuracy: 57.894737%
Best model saved with validation accuracy: 57.89473684210527
Epoch 6, Batch 10, Loss: 0.022807
Epoch 6, Validation Accuracy: 60.526316%
Best model saved with validation accuracy: 60.526315789473685
Epoch 7, Batch 10, Loss: 0.013641
Epoch 7, Validation Accuracy: 64.473684%
Best model saved with validation accuracy: 64.47368421052632
Epoch 8

In [33]:
denseNetModel = models.densenet201(pretrained = True)
# 修改第一个卷积层的输入通道数
denseNetModel.features.conv0 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

# 打印修改后的模型结构
print(denseNetModel)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

In [34]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
denseNetModel.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(denseNetModel.parameters(), lr=0.001)

log_dir = "./logs/resNet50"
writer = SummaryWriter(log_dir)

best_accuracy = 0.0  # 初始化最佳准确率为0
best_model_path = "./best_restNetmodel.pth"  # 模型保存路径

print("Number of mini-batches in one epoch:", len(trainloader))
for epoch in range(100):
    running_loss = 0.0  
    for i, data in enumerate(trainloader, 0):
        inputs_origin = data['features']
        inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
        inputs, labels = inputs_new.to(device), data['label'].to(device)  
        optimizer.zero_grad()

        outputs = denseNetModel(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % len(trainloader) == len(trainloader) - 1:
            average_loss = running_loss / 100
            print(f"Epoch {epoch+1}, Batch {i+1}, Loss: {average_loss:.6f}")

            global_step = epoch * len(trainloader) + i
            writer.add_scalar("Loss", average_loss, global_step)

            running_loss = 0.0

    # 在每个epoch结束后执行验证集评估
    correct = 0
    total = 0
    with torch.no_grad():
        for data in validationloader:
            inputs_origin = data['features']
            inputs_new = inputs_origin.unsqueeze(1).unsqueeze(2)
            inputs, labels = inputs_new.to(device), data['label'].to(device)
            outputs = denseNetModel(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * (correct / total)
    print(f"Epoch {epoch+1}, Validation Accuracy: {accuracy:.6f}%")

    # 如果当前模型在验证集上表现优于之前的最佳表现，则保存当前模型参数
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(denseNetModel.state_dict(), best_model_path)
        print("Best model saved with validation accuracy:", best_accuracy)

print("denseNetModel Training finished")

Number of mini-batches in one epoch: 10


RuntimeError: Given input size: (128x1x1274). Calculated output size: (128x0x637). Output size is too small