In [8]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.datasets import fetch_california_housing

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD
import torch.utils.data as Data
import matplotlib.pyplot as plt
import seaborn as sns


In [9]:
housedata_df = pd.read_csv("insurance.csv")

In [13]:
X = pd.get_dummies(housedata_df.drop(['charges'], axis=1) , drop_first=True, dtype=int).values
y = housedata_df.charges.values

# 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [14]:
# 数据标准化处理
scale = MinMaxScaler()
x_train_std = scale.fit_transform(X_train)
x_test_std = scale.transform(X_test)


In [15]:
# 将数据集转为张量
X_train_t = torch.from_numpy(x_train_std.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
X_test_t = torch.from_numpy(x_test_std.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))

# 将训练数据处理为数据加载器
train_data = Data.TensorDataset(X_train_t, y_train_t)
test_data = Data.TensorDataset(X_test_t, y_test_t)
train_loader = Data.DataLoader(dataset=train_data, batch_size=64, shuffle=True, num_workers=1)


In [16]:
# 搭建全连接神经网络回归
class FNN_Regression(nn.Module):
    def __init__(self):
        super(FNN_Regression, self).__init__()
        # 第一个隐含层
        self.hidden1 = nn.Linear(in_features=8, out_features=100, bias=True)
        # 第二个隐含层
        self.hidden2 = nn.Linear(100, 100)
        # 第三个隐含层
        self.hidden3 = nn.Linear(100, 50)
        # 回归预测层
        self.predict = nn.Linear(50, 1)

    # 定义网络前向传播路径
    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = F.relu(self.hidden3(x))
        output = self.predict(x)
        # 输出一个一维向量
        return output[:, 0]


In [18]:
testnet = FNN_Regression()
# 定义优化器
optimizer = torch.optim.SGD(testnet.parameters(), lr=0.01)
loss_func = nn.MSELoss()  # 均方根误差损失函数
train_loss_all = []

# 对模型迭代训练，总共epoch轮
for epoch in range(30):
    train_loss = 0
    train_num = 0
    # 对训练数据的加载器进行迭代计算
    for step, (b_x, b_y) in enumerate(train_loader):
        output = testnet(b_x)  # MLP在训练batch上的输出
        loss = loss_func(output, b_y)  # 均方根损失函数
        optimizer.zero_grad()  # 每次迭代梯度初始化0
        loss.backward()  # 反向传播，计算梯度
        optimizer.step()  # 使用梯度进行优化
        train_loss += loss.item() * b_x.size(0)
        train_num += b_x.size(0)
    train_loss_all.append(train_loss / train_num)


In [20]:
def predict_single(input, target, model):
    inputs = input.unsqueeze(0)
    predictions = model(inputs)               
    prediction = predictions[0].detach()
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)

In [22]:
input, target = X_test_t[0], y_test_t[0]
predict_single(input, target, testnet)

Input: tensor([0.5870, 0.2479, 0.4000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
Target: tensor(9095.0684)
Prediction: tensor(nan)
