In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import torchvision
import torchvision.transforms as T
import os
from torch.utils.data import Dataset,DataLoader,random_split

#设定随机种子以复现
def set_seed(seed):
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.determinstic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
"""
使用torchvision的MINST数据集
"""
trainval_dataset = torchvision.datasets.MNIST('./',True,transform = T.ToTensor(),download=True)

train_dataset , val_dataset = random_split(trainval_dataset,[int(0.8*len(trainval_dataset)) , len(trainval_dataset) - int(0.8*len(trainval_dataset))])

train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True)
val_dataloader = DataLoader(val_dataset,batch_size=32,shuffle=False)

# """
# 使用自定义数据集
# """
# class Mydataset(Dataset):
#   def __init__(self,path):
#     data = pd.read_csv(path,header=None)
#     x = data.iloc[:,1:].to_numpy()
#     y = data.iloc[:,0].to_numpy()

#     self.x = x
#     self.y = y
  
#   def __len__(self):
#     return len(self.y)
#   def __getitem__(self,idx):
#     x , y = self.x[idx] , self.y[idx]
#     x = torch.as_tensor(x,dtype=torch.float32).reshape(1,28,28)
#     y = torch.as_tensor(y,dtype=torch.int64)
#     return x,y
    
# # 读入数据
# path = '/content/sample_data/mnist_train_small.csv'
# dataset = Mydataset(path)

# train_dataset,val_dataset = random_split(dataset,[int(0.8*len(dataset)) , len(dataset) - int(0.8*len(dataset))] )

# print(len(dataset) , len(train_dataset) , len(val_dataset))

# train_dataloader = DataLoader(train_dataset,batch_size = 32,shuffle=True)
# val_dataloader = DataLoader(val_dataset,batch_size = 32,shuffle=False)


In [None]:
# 自己定义一个MLP

class MyLinear(nn.Module):
  def __init__(self,in_channel,out_channel,bias=True):
    super(MyLinear,self).__init__()
    self.weight = nn.Parameter(torch.randn(in_channel,out_channel),requires_grad=True)
    if bias:
      self.bias = nn.Parameter(torch.randn(out_channel),requires_grad=True)
    else:
      self.bias = None

  def forward(self,x):
    x = x @ self.weight
    if self.bias !=None:
      x +=self.bias
    return x

In [None]:
class MyModel(nn.Module):
  def __init__(self,num_class=10):
    super(MyModel,self).__init__()
    self.Linear1 = MyLinear(784,1000)  #weight.shape 784 * 1000 ,bias.shape 1000
    self.bn = nn.BatchNorm1d(1000)
    self.Linear2 = MyLinear(1000,512)  # 1000*2000 +1000
    self.bn2 = nn.BatchNorm1d(512)
    self.Linear3 = MyLinear(512,num_class) #2000*10 + 10
  def forward(self,x):
    bs,ch,w,h = x.shape
    #32,1,28,28 -> 32，784
    x = x.reshape(bs,ch*w*h)
    x = F.relu(self.bn(self.Linear1(x)))
    x = F.relu(self.bn2(self.Linear2(x)))
    x = self.Linear3(x)
    return x


model = MyModel(10)
x = torch.randn(32,1,28,28)
model(x).shape

In [None]:
def sum_param(model):
  total_params = 0
  for name,param in model.named_parameters():
    total_params += np.prod(param.size())
    print(name,param.shape)
  print(f"the parameters of the model is {total_params}")
  return total_params


In [None]:
sum_param(model)

In [None]:
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-3)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
#torch.optim.lr_scheduler.LinearLR(optimizer,1e-3,1e-4,)

def train_one_epoch(model,optimizer,train_dataloader,val_dataloader,epoch):
  model.train()
  losses =[]
  model = model.to(device)

  for batch_idx,(x,y) in enumerate(train_dataloader):
    x,y = x.to(device),y.to(device)

    logits = model(x)

    loss = F.cross_entropy(logits,y)

    losses.append(loss.item())
    #清空累计的梯度
    optimizer.zero_grad()
    #梯度反向传播
    loss.backward()
    #更新参数
    optimizer.step()

    if batch_idx % 100 ==0:
      print(f'--epoch:{epoch}----------train_loss:{loss.item():7f}-------------')
  
  #-------------------------验证-----------------------------------#
  correct = 0.
  val_loss = []
  num = len(val_dataloader.dataset)
  with torch.no_grad():
    for batch_idx,(x,y) in enumerate(val_dataloader):
      x,y = x.to(device),y.to(device)

      logits = model(x)

      loss = F.cross_entropy(logits,y)

      val_loss.append(loss.item())

      correct += (logits.softmax(-1).argmax(-1) == y).sum()
  correct /=num

  print(f"--epoch:{epoch}----val_acc:{correct:7f}-----val_loss:{np.mean(val_loss):7f}")

  #------------------------保存------------------------------------#
  if epoch % 2 ==0:
    torch.save({
        "model":model.state_dict(),
        "optim":optimizer.state_dict(),
        "epoch":epoch
        },f'./val_acc_{correct:3f}_epoch_{epoch}.ckpt')
    print("a new model saved!")



In [None]:
epoch = 5
for i in range(epoch):
  train_one_epoch(model,optimizer,train_dataloader,val_dataloader,i)

In [None]:
# 读入模型
# model = MyModel(10)
# checkpoint = torch.load('/content/val_acc_0.974750_epoch_3.ckpt')

# model.load_state_dict(checkpoint['model'])

In [None]:
test_dataset = Mydataset('/content/sample_data/mnist_test.csv')
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=False)
@torch.no_grad()
def predict(model,test_dataloader):
  model.eval()
  correct = 0.
  model = model.to(device)
  for batch_idx,(x,y) in enumerate(test_dataloader):
      x,y = x.to(device),y.to(device)

      logits = model(x)
      correct += (logits.softmax(-1).argmax(-1) == y).sum()
  correct /=len(test_dataloader.dataset)
  print(f'---------test_acc:{correct}-------------')

predict(model,test_dataloader)

In [None]:
import matplotlib.pyplot as plt
iter_dataset = iter(train_dataset)
plt.figure(figsize=(25,25))
model.eval()
with torch.no_grad():
  for i in range(36):
    x,y = next(iter_dataset)
    y = torch.as_tensor(y)
    x,y = x.to(device) , y.to(device)
    logits = model(x.reshape(1,1,28,28))
    pre = logits.softmax(-1).argmax(-1).item()
    plt.subplot(6,6,i+1)
    plt.imshow(x[0].cpu())
    plt.title(f'predict:{pre},label:{y.item()}')
plt.show()