In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, random_split
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from datetime import datetime

In [2]:
# 加载和准备数据
data = pd.read_csv('earthquake.csv')
data['Date'] = pd.to_datetime(data['Date'])
data['Time'] = pd.to_datetime(data['Time'], format='%H:%M:%S').dt.time

In [3]:
data['Timestamp'] = data.apply(lambda row: pd.Timestamp(f"{row['Date']} {row['Time']}"), axis=1)

In [4]:
data['Timestamp']

0       1965-01-02 13:44:18
1       1965-01-04 11:29:49
2       1965-01-05 18:05:58
3       1965-01-08 18:49:43
4       1965-01-09 13:32:50
                ...        
23407   2016-12-28 08:22:12
23408   2016-12-28 09:13:47
23409   2016-12-28 12:38:51
23410   2016-12-29 22:30:19
23411   2016-12-30 20:08:28
Name: Timestamp, Length: 23412, dtype: datetime64[ns]

In [6]:
data['Timestamp'] = data['Timestamp'].view('int64') // 10**9 #Unix时间戳
features = data[['Latitude', 'Longitude', 'Timestamp']]
targets = data[['Depth', 'Magnitude']]

In [7]:
features.values

array([[ 1.92460000e+01,  1.45616000e+02, -1.57630542e+08],
       [ 1.86300000e+00,  1.27352000e+02, -1.57465811e+08],
       [-2.05790000e+01, -1.73972000e+02, -1.57355642e+08],
       ...,
       [ 3.69179000e+01,  1.40426200e+02,  1.48292873e+09],
       [-9.02830000e+00,  1.18663900e+02,  1.48305062e+09],
       [ 3.73973000e+01,  1.41410300e+02,  1.48312851e+09]])

In [6]:
X = torch.tensor(features.values, dtype=torch.float32)
Y = torch.tensor(targets.values, dtype=torch.float32)

In [7]:
dataset = TensorDataset(X, Y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [8]:
# 计算分割尺寸
test_size = int(len(dataset) * 0.2)  # 20%作为测试集
train_size = len(dataset) - test_size  # 剩余作为训练集

# 随机分割数据集
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# 你可以将这些数据集进一步封装成DataLoader，便于批处理和迭代
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 从test_dataset中提取X_test和Y_test
# 注意：random_split返回的子集类型是Subset，我们需要通过.dataset属性访问原始数据
X_test = torch.stack([data[0] for data in test_dataset])
Y_test = torch.stack([data[1] for data in test_dataset])

print("X_test shape:", X_test.shape)  # 应该输出torch.Size([4682, 3])
print("Y_test shape:", Y_test.shape)  # 应该输出torch.Size([4682, 2])

X_test shape: torch.Size([4682, 3])
Y_test shape: torch.Size([4682, 2])


In [9]:
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # 如果有多个GPU，可以更改索引从 0 到 1, 2, ...
    print("Training on GPU...")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Training on CPU...")

Training on GPU...


In [12]:
# 定义神经网络结构
class ComplexNet(nn.Module):
    def __init__(self):
        super(ComplexNet, self).__init__()
        self.fc1 = nn.Linear(3, 64)  # 输入层到第一个隐藏层
        self.fc2 = nn.Linear(64, 128)  # 第一个隐藏层到第二个隐藏层
        self.fc3 = nn.Linear(128, 64)  # 第二个隐藏层到第三个隐藏层
        self.fc4 = nn.Linear(64, 2)  # 第三个隐藏层到输出层

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# 初始化网络
model = ComplexNet()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # 使用Adam优化器

In [13]:
# 训练网络
min_loss=10000
epochs = 200
for epoch in range(epochs):
    for batch_X, batch_Y in train_loader:
        batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)  # 移动到GPU

        optimizer.zero_grad()
        predictions = model(batch_X)
        loss = criterion(predictions, batch_Y)
        loss.backward()
        optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')
    if min_loss>loss.item():
        torch.save(model.state_dict(), 'minloss_BP_model.pth')
        min_loss=loss.item()
predictions = predictions.cpu()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
# 保存模型
torch.save(model.state_dict(), 'lastloss_BP_model.pth')

Epoch 1, Loss: 3115589.5
Epoch 11, Loss: 4404.81982421875
Epoch 21, Loss: 20.691078186035156
Epoch 31, Loss: 20.409597396850586
Epoch 41, Loss: 19.96933937072754
Epoch 51, Loss: 19.895641326904297
Epoch 61, Loss: 20.59037971496582
Epoch 71, Loss: 17.9504337310791
Epoch 81, Loss: 22.87740135192871
Epoch 91, Loss: 17.872581481933594
Epoch 101, Loss: 20.245941162109375
Epoch 111, Loss: 19.472984313964844
Epoch 121, Loss: 19.593257904052734
Epoch 131, Loss: 19.17864227294922
Epoch 141, Loss: 20.30057716369629
Epoch 151, Loss: 23.06133460998535
Epoch 161, Loss: 23.019851684570312
Epoch 171, Loss: 23.12517738342285
Epoch 181, Loss: 21.654699325561523
Epoch 191, Loss: 21.454879760742188
Epoch 200, Loss: 21.433610916137695


In [17]:
print(f'BestLoss: {min_loss}')

BestLoss: 17.647693634033203


In [15]:
# 切换到评估模式
model.eval()

# 不计算梯度
with torch.no_grad():
    actuals = []
    predictions = []
    
    for batch_X, batch_Y in test_loader:
        batch_X, batch_Y = batch_X.to(device), batch_Y.to(device)
        
        # 生成预测并将其收集起来
        output = model(batch_X)
        predictions.extend(output.cpu().numpy())
        actuals.extend(batch_Y.cpu().numpy())

In [19]:
# 计算RMSE
rmse = np.sqrt(mean_squared_error(actuals, predictions))
print(f'Root Mean Squared Error (RMSE): {rmse}')

# 计算MAE
mae = mean_absolute_error(actuals, predictions)
print(f'Mean Absolute Error (MAE): {mae}')

Root Mean Squared Error (RMSE): 101.29426574707031
Mean Absolute Error (MAE): 38.674434661865234
