In [208]:
import pandas as pd
import numpy as np
import torch.nn as nn
# import torch.optim as optim
import torch
from torch.utils.data import Dataset, DataLoader, random_split

In [160]:
class MyDataset(Dataset):
    def __init__(self, csv_file):
        self.csv_file = csv_file
        data = pd.read_csv(csv_file, header=None)
        data.dropna(inplace=True)
        data = data.replace("a", 1.0)
        data = data.replace("b", 2.0)
        data = data.replace("c", 3.0)
        data = data.replace("d", 4.0)
        data = data.replace("e", 5.0)
        data = data.replace("f", 6.0)
        data = data.replace("g", 7.0)
        data = data.replace("h", 8.0)
        data.loc[data[6] != "draw", 6] = 0.0
        data.loc[data[6] == "draw", 6] = 1.0
        for i in range(6):
            data[i] = (data[i]-data[i].mean()) / data[i].std()
        data.reset_index(drop=True, inplace=True)
        self.data = data.iloc[:, :-1]
        self.label = data.iloc[:, -1]
        self.data = self.data.astype("float")
        self.label = self.label.astype("float")
        self.data = torch.tensor(np.array(self.data.values), dtype=torch.float32)
        # self.label = torch.tensor(np.array([[1, 0] if self.label[i] == 1.0 else [0, 1] for i in range(len(self.label))]), dtype=torch.float32)
        self.label = torch.tensor(np.array(self.label), dtype=torch.long)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        sample = {"data": self.data[index], "label": self.label[index]}
        return sample

    def __len__(self):
        return len(self.data)

In [210]:
dataset = MyDataset(r"C:\Users\weitao\Desktop\Untitled Folder\ml\krkopt.data")
dataset_length = len(dataset)
trainset_length = int(0.7 * dataset_length)
validset_length = int(0.1 * dataset_length)
testset_length = dataset_length - trainset_length - validset_length
trainset, validset, testset = random_split(dataset, [trainset_length, validset_length, testset_length])
dataset[0]

{'data': tensor([-2.2511, -0.9218, -1.1078, -0.6625, -0.8771, -1.0905]),
 'label': tensor(1)}

In [219]:
# dataloader = DataLoader(dataset, 100, shuffle=True)
# for i_batch, smaple_batched in enumerate(dataloader):
#     print(i_batch, smaple_batched["label"].size())
#     break
trainloader = DataLoader(trainset, batch_size=100, shuffle=True)
validloader = DataLoader(validset, batch_size=100, shuffle=False)
testloader = DataLoader(testset, batch_size=100, shuffle=False)

In [246]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(6, 6), 
            nn.BatchNorm1d(6), 
            nn.ReLU(), 
            nn.Linear(6, 10),
            nn.BatchNorm1d(10), 
            nn.ReLU(),  
            nn.Linear(10, 6), 
            nn.BatchNorm1d(6), 
            nn.Linear(6, 2)
            # 之所以最后不加softmax因为，计算crossentropyloss的时候会计算
        )
    
    def forward(self, x):
        return self.net(x)

In [247]:
model = MyModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0)

In [249]:
for epoch in range(10):
    runing_loss = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data["data"], data["label"]
        optimizer.zero_grad()
        pred = model(inputs)
        # 如果有多个样本，则已经做了平均
        loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()
        runing_loss += loss.item()
    print("loss: %.3f"%(runing_loss / i))
    with torch.no_grad():
        correct = 0
        total = 0
        for i, data in enumerate(validloader, 0):
            inputs, labels = data["data"], data["label"]
            out = model(inputs)
            _, pre = torch.max(out.data, 1)
            total += labels.size(0)
            correct += (labels == pre).sum().item()
        # print(correct, total)
        print("accuary: %.3f" %(correct / total))

loss: 0.163
accuary: 0.944
loss: 0.153
accuary: 0.947
loss: 0.141
accuary: 0.953
loss: 0.133
accuary: 0.955
loss: 0.117
accuary: 0.961
loss: 0.106
accuary: 0.963
loss: 0.096
accuary: 0.968
loss: 0.088
accuary: 0.971
loss: 0.089
accuary: 0.969
loss: 0.089
accuary: 0.975


In [250]:
with torch.no_grad():
    correct = 0
    total = 0
    for i, data in enumerate(testloader, 0):
        inputs, labels = data["data"], data["label"]
        out = model(inputs)
        _, pre = torch.max(out.data, 1)
        total += labels.size(0)
        correct += (labels == pre).sum().item()
    print(correct, total)
    print("accuary: %.3f" %(correct / total))

5415 5612
accuary: 0.965
