In [1]:
# check GPU type.
!nvidia-smi

Tue Nov  5 10:27:36 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.90                 Driver Version: 565.90         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   49C    P8             10W /   55W |       0MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

** Import necessary packages **

In [2]:
_exp_name = "sample"
# Import necessary packages.
import numpy as np
import pandas as pd
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random
import torch
from torch.utils.data import DataLoader, random_split
from torch.nn.utils.rnn import pad_sequence
from torch.utils.tensorboard import SummaryWriter


seed

In [3]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

** Define dataset and dataloader **

In [4]:
def read_df(df,df_):
    df = df.iloc[:,1:].T
    df_=df_.iloc[:,1:].T
    df_append = pd.concat([df, df_], ignore_index=True, axis=0)
    return  df_append

class MyDataset(Dataset):

    def __init__(self,path,files = None):
        super(MyDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith("电流.csv")])
        self.files_= sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith("转矩.csv")])
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self, idx):
        try:
            fname = self.files[idx]
            data = pd.read_csv(fname)
            data_ = pd.read_csv(self.files_[idx])
            
            # Process data into tensor
            data_tensor = torch.tensor(read_df(data, data_).values, dtype=torch.float32)
            # Check if data_tensor has the expected shape
            if data_tensor.ndim != 2 or data_tensor.shape[0] != 4:  # Adjust according to your needs
                print(f"Unexpected shape for {fname}: {data_tensor.shape}")
                return None, -1  # Indicate an error
    
        except Exception as e:
            print(f"Error reading file {fname}: {e}")
            return None, -1  # Handle error appropriately
    
        try:
            label = int(fname.split("\\")[-1].split("_")[0])  # Use [-1] to get last part
        except Exception as e:
            print(f"Error extracting label from {fname}: {e}")
            label = 0  # Test has no label
    
        return data_tensor, label

def MyDataLoader(data_dir, batch_size, n_workers):
    """Generate dataloader"""
    dataset = MyDataset(data_dir)
    trainlen = int(0.9 * len(dataset))
    lengths = [trainlen, len(dataset) - trainlen]
    trainset, validset = random_split(dataset, lengths)

    train_loader = DataLoader(
        trainset,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=n_workers,
        pin_memory=True,

    )
    valid_loader = DataLoader(
        validset,
        batch_size=batch_size,
        num_workers=n_workers,
        drop_last=True,
        pin_memory=True,

    )

    return train_loader, valid_loader, 

In [5]:
import torch
import torch.nn as nn

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.query = nn.Conv1d(in_channels, max(in_channels // 8,1), kernel_size=1)
        self.key = nn.Conv1d(in_channels, max(in_channels // 8,1), kernel_size=1)
        self.value = nn.Conv1d(in_channels, in_channels, kernel_size=1)

    def forward(self, x):
        batch_size, channels, length = x.size()
        
        # Compute query, key, value
        Q = self.query(x)  # [batch_size, channels/8, length]
        K = self.key(x)    # [batch_size, channels/8, length]
        V = self.value(x)  # [batch_size, channels, length]

        # Calculate attention scores
        attention_scores = torch.bmm(Q.permute(0, 2, 1), K)  # [batch_size, length, length]
        attention_weights = torch.softmax(attention_scores, dim=-1)

        # Apply attention weights to values
        out = torch.bmm(V.view(batch_size, channels, -1), attention_weights)  # [batch_size, channels, length]

        return out + x  # Residual connection

class ResidualSelfAttentionBlock(nn.Module):
    def __init__(self, in_channels):
        super(ResidualSelfAttentionBlock, self).__init__()
        self.attention = SelfAttention(in_channels)
        self.conv = nn.Conv1d(in_channels, in_channels, kernel_size=3, padding=1)
        self.bn = nn.BatchNorm1d(in_channels)

    def forward(self, x):
        attn_out = self.attention(x)  # Apply attention
        conv_out = self.bn(self.conv(attn_out))  # Convolution after attention
        return conv_out + x  # Residual connection

class ResidualSelfAttentionNetwork(nn.Module):
    def __init__(self):
        super(ResidualSelfAttentionNetwork, self).__init__()
        
        self.layer1 = ResidualSelfAttentionBlock(4)  # 输入通道为4
        self.layer2 = ResidualSelfAttentionBlock(4)  # 保持相同的通道数
        
        self.fc = nn.Linear(4 * 1001, 11)  # 假设输出11个类别
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        
        x = x.view(x.size(0), -1)  # Flatten
        return self.fc(x)


In [6]:
class OneDCNN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(OneDCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=4, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.fc1 = nn.Linear(32 * ((input_size - 2) // 2 - 2), 128)  # Adjust based on input size
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the output
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x
            

In [7]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = OneDCNN(input_size=1001, num_classes=10).to(device)

# The number of batch size.
batch_size = 10

# The number of training epochs.
n_epochs = 1000
# If no improvement in 'patience' epochs, early stop.
patience = 10000

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.003, weight_decay=1e-5)

writer = SummaryWriter('runs/demo_lr0.003_wd1e-5_bs128_ep100_pat300_5')

In [8]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = MyDataset("定子匝间短路")
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = MyDataset("定子匝间短路")
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
for batch in tqdm(train_loader):
    datas, labels = batch
    print(datas.shape, labels.shape)

  0%|          | 0/2 [00:00<?, ?it/s]

torch.Size([10, 4, 1001]) torch.Size([10])


RuntimeError: stack expects each tensor to be equal size, but got [4, 1001] at entry 0 and [4, 2002] at entry 4

In [None]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        datas, labels = batch
        
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(datas.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    writer.add_scalar('Loss/train', train_loss, epoch) 
    writer.add_scalar('Accuracy/train', train_acc, epoch)

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        datas, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(datas.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    writer.add_scalar('Loss/valid', train_loss, epoch) 
    writer.add_scalar('Accuracy/valid', train_acc, epoch)


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

  0%|          | 0/1 [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (10x7936 and 15904x128)