In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn 
import torch.utils.data as Data
from torch.autograd import Variable
from standardlize import standardlize
from equalize import equalize
from utils import splitAfterStd

In [3]:
df = pd.read_csv('../data/sampleMatrixGene.txt', sep='\t', index_col=0)
print(df.shape)

In [41]:
# 特征选择，数据标准化，划分训练测试集
selectGenes = np.loadtxt('./selectGenes.txt', dtype=str)
selectCol = np.append(selectGenes, ['type','label'])
df = df.loc[:,selectCol]

data_std = standardlize(df.iloc[:,:-2])
train_data , test_data , train_tag , test_tag = splitAfterStd(data_std, df.iloc[:,-1].values, ratio=0.8)
train_data_resample , train_tag_resample = equalize(train_data, train_tag)

In [42]:
class trainDataSet(torch.utils.data.Dataset):
    def __init__(self):
        self.x = np.array(train_data_resample)
        self.y = np.array(train_tag_resample)
        self.x = torch.tensor(self.x, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.int64)
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
    def __len__(self):
        return len(self.x)
class testDataSet(torch.utils.data.Dataset):
    def __init__(self):
        self.x = np.array(test_data)
        self.y = np.array(test_tag)
        self.x = torch.tensor(self.x, dtype=torch.float32)
        self.y = torch.tensor(self.y, dtype=torch.int64)
    def __getitem__(self,idx):
        return self.x[idx],self.y[idx]
    def __len__(self):
        return len(self.x)

In [43]:
class LinearNet(nn.Module):
    def __init__(self,num_classes=2):
        super(LinearNet,self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(1000,512), nn.ReLU(True))
        self.layer2 = nn.Sequential(nn.Linear(512,256), nn.ReLU(True))
        self.layer3 = nn.Sequential(nn.Linear(256,128), nn.ReLU(True))
        self.layer4 = nn.Sequential(nn.Linear(128,64), nn.ReLU(True))
        self.layer5 = nn.Sequential(nn.Linear(64,32), nn.ReLU(True))
        self.layer6 = nn.Sequential(nn.Linear(32,num_classes), nn.ReLU(True))

    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        return out
        
model = LinearNet()
model.cuda()

LinearNet(
  (layer1): Sequential(
    (0): Linear(in_features=1000, out_features=512, bias=True)
    (1): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Linear(in_features=512, out_features=256, bias=True)
    (1): ReLU(inplace=True)
  )
  (layer3): Sequential(
    (0): Linear(in_features=256, out_features=128, bias=True)
    (1): ReLU(inplace=True)
  )
  (layer4): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU(inplace=True)
  )
  (layer5): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU(inplace=True)
  )
  (layer6): Sequential(
    (0): Linear(in_features=32, out_features=2, bias=True)
    (1): ReLU(inplace=True)
  )
)

In [44]:
BATCH_SIZE = 50
train_dataset = trainDataSet()
test_dataset  = testDataSet()
train_loader  = Data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader   = Data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

In [45]:
NUM_EPOCHS = 50
LR = 1e-3
loss = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=0.9, weight_decay=1e-6, nesterov=True)
tr_acc = []
te_acc = []
val_loss_arr = []
for echo in range(NUM_EPOCHS):
    train_loss = 0   
    train_acc = 0   
    model.train()    
    for i,(X,label) in enumerate(train_loader):    
        X = Variable(X).cuda()
        # print(X)       
        label = Variable(label).cuda()
        # print(label)
        out = model(X)
        # print(out)     
        lossvalue = loss(out,label)         
        optimizer.zero_grad()       
        lossvalue.backward()    
        optimizer.step()          
         
        train_loss += float(lossvalue)   
        _,pred = out.max(1)
        num_correct = (pred == label).sum()
        acc = int(num_correct) / X.shape[0]
        train_acc += acc
    print("echo:"+' ' + str(echo))
    print("loss:" + ' ' + str(train_loss / len(train_loader)))
    print("TrainACC:" + ' '+str(train_acc / len(train_loader)))
    tr_acc.append(train_acc / len(train_loader))

    eval_acc = 0
    val_loss = 0
    model.eval()
    for i,(X,label) in enumerate(test_loader):
        X = Variable(X).cuda()
        label = Variable(label).cuda()
        testout = model(X)
        v_loss = float(loss(testout,label))

        _, pred = testout.max(1)
        num_correct = (pred == label).sum()
        acc = int(num_correct) / X.shape[0]
        eval_acc += acc
        val_loss += v_loss
    print("ValLoss:" + ' ' + str(val_loss / len(test_loader)))
    print('ValACC:' + ' ' + str(eval_acc / len(test_loader)))
    cur_eval_acc = eval_acc/len(test_loader)
    te_acc.append(eval_acc/len(test_loader))
    val_loss_arr.append(val_loss/len(test_loader))

echo: 0
loss: 0.691578826858002
TrainACC: 0.49883495145631057
ValLoss: 0.7008529510991327
ValACC: 0.07977011494252877
echo: 1
loss: 0.6854325035243358
TrainACC: 0.5046601941747567
ValLoss: 0.6997905846299797
ValACC: 0.11770114942528741
echo: 2
loss: 0.6593773434463056
TrainACC: 0.7818446601941749
ValLoss: 0.657918749184444
ValACC: 0.8648275862068964
echo: 3
loss: 0.522258539367648
TrainACC: 0.9480582524271833
ValLoss: 0.44637319548376675
ValACC: 0.944137931034483
echo: 4
loss: 0.19228952475687833
TrainACC: 0.9772815533980571
ValLoss: 0.13142168264964532
ValACC: 0.9737931034482761
echo: 5
loss: 0.06330715228342315
TrainACC: 0.986893203883494
ValLoss: 0.06935893647886555
ValACC: 0.9806896551724139
echo: 6
loss: 0.036380845870262066
TrainACC: 0.991165048543689
ValLoss: 0.05611365795906248
ValACC: 0.9841379310344829
echo: 7
loss: 0.024699154965471006
TrainACC: 0.9944660194174753
ValLoss: 0.054059336491828335
ValACC: 0.9841379310344829
echo: 8
loss: 0.018951747003599133
TrainACC: 0.99572815