In [295]:
import torch
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim

In [296]:
f = pd.read_csv( "./data/air_pollution_data.csv" )
f

Unnamed: 0,Country,AQI Value,CO AQI Value,Ozone AQI Value,NO2 AQI Value,PM2.5 AQI Value
0,Russian Federation,51,1,36,0,51
1,Brazil,41,1,5,1,41
2,Italy,66,1,39,2,66
3,Poland,34,1,34,0,20
4,France,22,0,22,0,6
...,...,...,...,...,...,...
23458,India,184,3,154,2,184
23459,France,50,1,20,5,50
23460,India,50,1,22,1,50
23461,United States of America,71,1,44,2,71


读取数据集

In [297]:
class TDataSet(Dataset):
    def __init__( self, fpath ):
        self.data = pd.read_csv( fpath )
        dlist = self.data[ 'Country' ].tolist()
        el = list( set ( dlist ) )
        eln = { element: idx for idx, element in enumerate( el ) }
        num = [ int( eln[e] ) for e in dlist ]
        self.inputs = self.data.iloc[ :, 1:].values
        print( num )    
        self.labels = np.array( num ).reshape( -1, 1 )
        # n = self.data.shape[0]
        # self.labels = np.arange( 0, n ).reshape( n, 1 )
        
        # Normalize
        self.mean = np.mean( self.inputs, axis = 0 )    # 每一列
        self.std = np.std( self.inputs, axis = 0 )      # 标准差
        self.inputs = ( self.inputs - self.mean ) / self.std
        
        self.label_mean = np.mean( self.labels, axis = 0 )
        self.label_std = np.std( self.labels, axis = 0 )
        self.labels = ( self.labels - self.label_mean ) / self.label_std

        
    def __len__( self ):
        return len( self.data )
    
    def __getitem__( self, idx ):
        inputs = torch.tensor( self.inputs[idx], dtype=torch.float32 )
        labels = torch.tensor( self.labels[idx], dtype=torch.float32 )
        return inputs, labels
    

神经网络
- 没有卷积池化，全是全连接层
- 反向传播算法
- 通用性强，可用于多种任务

In [298]:
class BPModel( nn.Module ):
    def __init__( self, in_size, hidden_size, out_size ):
        super( BPModel, self ).__init__()
        self.method = nn.Sequential(
            nn.Linear( in_size, hidden_size ),
            nn.ReLU(),
            nn.Linear( hidden_size, out_size )
        )
        
        
    def forward(self, x):
        return self.method( x )

In [299]:
dataset = TDataSet( "./data/air_pollution_data.csv" )
dataloader = DataLoader( dataset, batch_size = 64, shuffle = True )

in_size = dataset.inputs.shape[1]
hidden_size = 64
out_size = 1
lr = 0.01


model = BPModel( in_size, hidden_size, out_size )
loss_f = nn.MSELoss()
optimizer = optim.Adam( model.parameters(), lr )

[28, 141, 57, 83, 39, 123, 160, 78, 28, 91, 156, 45, 129, 119, 64, 39, 129, 57, 123, 129, 83, 129, 141, 156, 141, 129, 175, 110, 28, 39, 123, 29, 141, 46, 92, 123, 149, 123, 123, 141, 156, 28, 148, 129, 123, 78, 91, 7, 123, 149, 123, 119, 129, 119, 24, 99, 102, 123, 45, 123, 123, 83, 175, 57, 115, 149, 99, 28, 123, 123, 160, 160, 149, 123, 141, 129, 160, 156, 160, 123, 129, 160, 39, 123, 153, 135, 38, 95, 57, 129, 141, 132, 132, 57, 38, 156, 29, 110, 135, 99, 18, 132, 141, 141, 141, 132, 139, 148, 28, 129, 141, 129, 67, 83, 99, 167, 46, 141, 99, 141, 167, 132, 141, 141, 141, 23, 25, 123, 123, 74, 97, 38, 123, 123, 28, 129, 129, 123, 123, 123, 123, 156, 156, 156, 156, 49, 156, 32, 156, 156, 156, 123, 160, 123, 78, 161, 74, 41, 160, 123, 123, 141, 40, 123, 160, 18, 156, 149, 160, 160, 52, 24, 45, 145, 160, 175, 28, 149, 47, 28, 129, 138, 123, 133, 123, 160, 156, 134, 167, 57, 160, 18, 51, 29, 28, 18, 83, 130, 130, 135, 129, 95, 99, 18, 18, 18, 129, 99, 28, 158, 123, 168, 18, 110, 99, 29,

训练

In [300]:
for epoch in range( 15 ):
    model.train()
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model( inputs )
        # labels = labels.view( -1, 1 )
        loss = loss_f( outputs, labels )
        loss.backward()
        optimizer.step()
        
    print( f'Epoch [ {epoch+1}/{15} ] Loss: {loss.item():.4f}' )

Epoch [ 1/15 ] Loss: 0.9057
Epoch [ 2/15 ] Loss: 0.7342
Epoch [ 3/15 ] Loss: 1.1627
Epoch [ 4/15 ] Loss: 1.0243
Epoch [ 5/15 ] Loss: 0.8564
Epoch [ 6/15 ] Loss: 0.9527
Epoch [ 7/15 ] Loss: 1.1222
Epoch [ 8/15 ] Loss: 0.7956
Epoch [ 9/15 ] Loss: 0.9033
Epoch [ 10/15 ] Loss: 0.6417
Epoch [ 11/15 ] Loss: 0.7754
Epoch [ 12/15 ] Loss: 0.7314
Epoch [ 13/15 ] Loss: 0.8700
Epoch [ 14/15 ] Loss: 0.7954
Epoch [ 15/15 ] Loss: 0.7415


In [301]:
torch.save( model.state_dict(), './path/BP.pth' )
torch.save( model, './path/BP.pt' )

预测

In [303]:
test = torch.load( './path/BP.pt' )

tst = [51,1,36,0,51]
tst = ( tst - dataset.mean ) / dataset.std
tst_t = torch.tensor( tst, dtype = torch.float32 ).unsqueeze( 0 )

model.eval()
with torch.no_grad():
    predict = test( tst_t )

predict = predict * dataset.label_std + dataset.label_mean

predict

tensor([[89.9406]], dtype=torch.float64)