In [3]:
import pandas as pd
import numpy as np

PATH = "./data/"
train = pd.read_csv(PATH+'train.csv')
test = pd.read_csv(PATH+'test.csv')
train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
print(train.shape)
train.describe()

(891, 12)


Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [5]:
print(test.shape)
test.describe()

(418, 11)


Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
count,418.0,418.0,332.0,418.0,418.0,417.0
mean,1100.5,2.26555,30.27259,0.447368,0.392344,35.627188
std,120.810458,0.841838,14.181209,0.89676,0.981429,55.907576
min,892.0,1.0,0.17,0.0,0.0,0.0
25%,996.25,1.0,21.0,0.0,0.0,7.8958
50%,1100.5,3.0,27.0,0.0,0.0,14.4542
75%,1204.75,3.0,39.0,1.0,0.0,31.5
max,1309.0,3.0,76.0,8.0,9.0,512.3292


In [6]:
from sklearn.preprocessing import Normalizer
# 数据预处理
def preprocess(data):
    # Data Cleaning
    data = data.drop(columns=['PassengerId'], axis=1)
    data['UknAge'] = data['Age'].map(lambda x: x>0).astype(int)
    data['Age'] = data['Age'].fillna(0)
    data['Fare'] = data['Fare'].fillna(data['Fare'].median())
    data['Embarked'] = data['Embarked'].fillna('S')
    #### One-hot Encoding
    data['Pclass'] -= 1
    data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
    data['Embarked'] = data['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})
    dummy_features = ['Pclass', 'Sex', 'SibSp', 'Parch', 'Embarked']
    data = pd.get_dummies(data, columns=dummy_features, dtype=float)
    
    data = data.fillna(0)
    #### Normalization
    num_features = ['Age', 'Fare']
    data[num_features] = Normalizer().fit_transform(data[num_features])
    #### To List
    data.drop(columns=data.columns[data.dtypes=='object'], axis=1, inplace=True)
    return data

In [7]:
traindf = preprocess(train)
testdf = preprocess(test)

In [8]:
print(traindf.shape)
print(testdf.shape)

(891, 26)
(418, 26)


In [9]:
print(set(traindf.columns)-set(testdf.columns))
set(testdf.columns)-set(traindf.columns)

{'Survived'}


{'Parch_9'}

In [10]:
testdf.drop(columns={'Parch_9'}, axis=1, inplace=True)

In [11]:
import torch
from torch.utils.data import Dataset

class TitanicDataset(Dataset):
    def __init__(self, df):
        self.len = df.shape[0]
        self.x_data = torch.from_numpy(df.drop(columns=['Survived'], axis=1).values).to(torch.float64)
        self.y_data = torch.from_numpy(df['Survived'].values)
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [12]:
from torch.utils.data import DataLoader

dataset = TitanicDataset(traindf)
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=0) #num_workers 多线程

In [13]:
import torch.nn.functional as F
import torch.nn as nn

class TitanicModel(torch.nn.Module):
    def __init__(self):
        super(TitanicModel, self).__init__()
        self.linear1 = nn.Linear(25, 13)
        self.linear2 = nn.Linear(13, 6)
        self.linear3 = nn.Linear(6, 4)
        self.linear4 = nn.Linear(4,1)
        self.sigmoid = nn.Sigmoid()


    def forward(self, x):
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        x = self.sigmoid(x)
        return x

In [14]:
model = TitanicModel()

# construct loss and optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.56)

losses = []
for epoch in range(1000):
    train_loss = 0
    num_right = 0
    total = 0
    for i, data in enumerate(train_loader, 0): # train_loader 是先shuffle后mini_batch
        inputs, labels = data
        inputs, labels = inputs.float(), labels.float().reshape(-1, 1)
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item() * len(inputs)
        total += len(inputs)
        result = [1 if out >= 0.5 else 0 for out in y_pred]
        num_right += np.sum(np.array(result) == labels.numpy().reshape(-1))
    
    train_loss = train_loss #/ total
    accuracy = num_right #/ total

    if epoch % 50 == 0:
        print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, accuracy, epoch))
            
    #losses.append(loss.detach().numpy())

Loss: 624.8147486448288 Accuracy: 342 Epoch:0
Loss: 580.3384671211243 Accuracy: 342 Epoch:50
Loss: 542.3344947099686 Accuracy: 342 Epoch:100
Loss: 531.5226611495018 Accuracy: 342 Epoch:150
Loss: 527.6788865923882 Accuracy: 342 Epoch:200
Loss: 525.2824735045433 Accuracy: 342 Epoch:250
Loss: 523.1089599728584 Accuracy: 342 Epoch:300
Loss: 520.1314758658409 Accuracy: 342 Epoch:350
Loss: 518.6859980225563 Accuracy: 342 Epoch:400
Loss: 517.3291271328926 Accuracy: 342 Epoch:450
Loss: 515.696218252182 Accuracy: 342 Epoch:500
Loss: 514.5949493646622 Accuracy: 342 Epoch:550
Loss: 513.9855654239655 Accuracy: 342 Epoch:600
Loss: 513.3786295354366 Accuracy: 342 Epoch:650
Loss: 512.6854085326195 Accuracy: 342 Epoch:700
Loss: 511.6219474673271 Accuracy: 342 Epoch:750
Loss: 510.99775755405426 Accuracy: 342 Epoch:800
Loss: 510.55481868982315 Accuracy: 342 Epoch:850
Loss: 510.5670741200447 Accuracy: 342 Epoch:900
Loss: 510.72464096546173 Accuracy: 342 Epoch:950


In [15]:
result

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [16]:
labels

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]])

In [None]:
import matplotlib.pyplot as plt
plt.plot(losses)

In [None]:
tensor_test_x = torch.FloatTensor(testdf.values)
with torch.no_grad():
    test_output = model(tensor_test_x)
    result = np.array([1 if out >= 0.5 else 0 for out in test_output])
    submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': result})

In [None]:
submission[submission['Survived']==0]

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(5, 128)
        self.layer2 = nn.Linear(128, 256)
        self.layer3 = nn.Linear(256, 128)
        self.layer4 = nn.Linear(128, 1)

    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = torch.sigmoid(x)
        return x

class SimpleDataset(Dataset):
    def __init__(self, x, y):
        super().__init__()
        self.x = x
        self.y = y
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [20]:
data_set

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
0,3,22.000000,1,0,7.2500
1,1,38.000000,1,0,71.2833
2,3,26.000000,0,0,7.9250
3,1,35.000000,1,0,53.1000
4,3,35.000000,0,0,8.0500
...,...,...,...,...,...
413,3,29.881138,0,0,8.0500
414,1,39.000000,0,0,108.9000
415,3,38.500000,0,0,7.2500
416,3,29.881138,0,0,8.0500


In [18]:
data_set = pd.concat((train.drop(['Survived'], axis = 1), test), axis = 0)

data_set = data_set.drop(['PassengerId', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis = 1)
data_set = data_set.fillna(data_set.mean())

n_train = train.shape[0]
train_x, test_x = data_set[:n_train], data_set[n_train:]
train_y = train['Survived']

train_x = train_x[train_x.keys()].values
test_x = test_x[test_x.keys()].values
train_y = train_y.values

import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader, SubsetRandomSampler
from sklearn.model_selection import KFold

k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)

simple_dataset = SimpleDataset(train_x, train_y)

loss_list = []
acc_list = []

for fold, (train_ids, valid_ids) in enumerate(kfold.split(simple_dataset)):
    print(f'FOLD {fold}')
    print('--------------------------------')
    train_subsampler = SubsetRandomSampler(train_ids)
    valid_subsampler = SubsetRandomSampler(valid_ids)
    
    train_loader = DataLoader(simple_dataset, batch_size=99, sampler=train_subsampler)
    valid_loader = DataLoader(simple_dataset, batch_size=99, sampler=valid_subsampler)
    
    simple_nn = SimpleNN()
    optimizer = optim.Adam(simple_nn.parameters(), lr=0.01)
    error = nn.BCELoss()
    
    for epoch in range(300):
        with torch.no_grad():
            valid_loss = 0
            num_right = 0
            for tensor_x, tensor_y in valid_loader:
                tensor_x = tensor_x.float()
                tensor_y = tensor_y.float().reshape(-1, 1)
                output = simple_nn(tensor_x)
                loss = error(output, tensor_y)
                valid_loss += loss.item() * len(tensor_x)
                result = [1 if out >= 0.5 else 0 for out in output]
                num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
            
            valid_loss = valid_loss / len(valid_loader.sampler.indices)
            valid_accuracy = num_right / len(valid_loader.sampler.indices)
        
            if epoch % 50 == 0:
                print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))

        train_loss = 0
        num_right = 0
        for tensor_x, tensor_y in train_loader:
            tensor_x = tensor_x.float()
            tensor_y = tensor_y.float().reshape(-1, 1)
            optimizer.zero_grad()
            output = simple_nn(tensor_x)
            loss = error(output, tensor_y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * len(tensor_x)
            result = [1 if out >= 0.5 else 0 for out in output]
            num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
        
        train_loss = train_loss / len(train_loader.sampler.indices)
        accuracy = num_right / len(train_loader.sampler.indices)
    
        if epoch % 50 == 0:
            print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, accuracy, epoch))
            
    loss_list.append(valid_loss)
    acc_list.append(valid_accuracy)

print('Training Ended')
print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))

FOLD 0
--------------------------------
Valid Loss: 0.904531915760573 Accuracy: 0.40782122905027934 Epoch:0
Loss: 1.8107023038221208 Accuracy: 0.49297752808988765 Epoch:0
Valid Loss: 0.5929895436297582 Accuracy: 0.7039106145251397 Epoch:50
Loss: 0.5527062832555744 Accuracy: 0.7275280898876404 Epoch:50
Valid Loss: 0.6424723970157474 Accuracy: 0.7039106145251397 Epoch:100
Loss: 0.5355912347355586 Accuracy: 0.7331460674157303 Epoch:100
Valid Loss: 0.8669290382768855 Accuracy: 0.7150837988826816 Epoch:150
Loss: 0.5165073457644896 Accuracy: 0.75 Epoch:150
Valid Loss: 0.9368195880058757 Accuracy: 0.7150837988826816 Epoch:200
Loss: 0.5093271379892745 Accuracy: 0.7570224719101124 Epoch:200
Valid Loss: 0.7968859605949018 Accuracy: 0.7150837988826816 Epoch:250
Loss: 0.5064727894626977 Accuracy: 0.7584269662921348 Epoch:250
FOLD 1
--------------------------------
Valid Loss: 0.8285672152310275 Accuracy: 0.4438202247191011 Epoch:0
Loss: 2.632898360616194 Accuracy: 0.541374474053296 Epoch:0
Valid L

In [None]:
tensor_test_x = torch.FloatTensor(test_x)
with torch.no_grad():
    test_output = simple_nn(tensor_test_x)
    result = np.array([1 if out >= 0.5 else 0 for out in test_output])
    submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': result})
submission

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

# 生成示例数据
np.random.seed(0)
T = 100  # 时间序列长度
time = np.arange(0, T, 1)
temperature = 20 + 10 * np.sin(2 * np.pi * time / 365) + np.random.normal(0, 2, T)


In [None]:
plt.plot(time, temperature)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import math

# 准备数据
def prepare_data(data, seq_length):
    input_data, target = [], []
    for i in range(len(data) - seq_length):
        input_data.append(data[i:i+seq_length])
        target.append(data[i+seq_length])
    return input_data, target

# 定义序列长度和训练集大小
seq_length = 10
train_size = int(0.8 * len(temperature))

# 准备训练集和测试集数据
train_data, train_target = prepare_data(temperature[:train_size], seq_length)
test_data, test_target = prepare_data(temperature[train_size:], seq_length)

# 转换数据为PyTorch张量
train_data = torch.tensor(train_data).float()
train_target = torch.tensor(train_target).float()
test_data = torch.tensor(test_data).float()
test_target = torch.tensor(test_target).float()

# 定义位置编码器
class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len):
        super(PositionalEncoder, self).__init__()
        self.d_model = d_model
        
        # 计算位置编码矩阵
        pe = torch.zeros(max_seq_len, d_model)
        position = torch.arange(0, max_seq_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x * math.sqrt(self.d_model)
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len]
        return x

# 定义Transformer模型
class TransformerModel(nn.Module):
    def __init__(self, d_model, max_seq_len, num_heads, num_layers):
        super(TransformerModel, self).__init__()
        self.position_encoder = PositionalEncoder(d_model, max_seq_len)
        
        self.transformer = nn.Transformer(
            d_model=d_model,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers
        )
        
        self.fc = nn.Linear(d_model, 1)
    
    def forward(self, x):
        x = self.position_encoder(x)
        x = self.transformer(x, x)
        x = x[:, -1, :]  # 只使用最后一个时间步的输出
        x = self.fc(x)
        return x

# 定义模型参数
d_model = 32
max_seq_len = seq_length
num_heads = 4
num_layers = 2

# 创建模
model = TransformerModel(d_model, max_seq_len, num_heads, num_layers)

In [None]:
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
batch_size = 32 #150
# 迭代训练数据
num_epochs = 10
for epoch in range(num_epochs):
    # 在每个epoch之前初始化损失累计
    total_loss = 0.0
    
    for i in range(0, len(train_data), batch_size):
        # 获取一个batch的训练数据
        batch_inputs = train_data[i:i+batch_size]
        batch_labels = train_target[i:i+batch_size]
        
        # 清零梯度
        optimizer.zero_grad()
        
        # 前向传播
        print(batch_inputs.shape)
        outputs = model(batch_inputs)
        
        # 计算损失
        loss = criterion(outputs, batch_labels)
        
        # 反向传播和优化
        loss.backward()
        optimizer.step()
        
        # 累计损失
        total_loss += loss.item()
    
    # 打印每个epoch的平均损失
    avg_loss = total_loss / (len(train_data) // batch_size)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

In [None]:
train_size