In [1]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch import nn
import numpy as np
import pandas as pd

# test torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
df = pd.read_csv('train.csv')
print(df.head())

# selecting attributs
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
X = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].copy()
y = df['Survived']

X['Age'].fillna(X['Age'].mean(), inplace=True)
X['Fare'].fillna(X['Fare'].mean(), inplace=True)

print(X.isnull().sum())

# train : test (60:20):20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalization 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)


cuda:0
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN       

In [2]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN, self).__init__()
        # input layer
        self.fc1 = nn.Linear(6, 64)  
        # hidden layer
        self.fc2 = nn.Linear(64, 64) 
        self.fc3 = nn.Linear(64, 64) 
        # output layer 
        self.fc4 = nn.Linear(64, 1)  

    def forward(self, x):
        x = torch.relu(self.fc1(x)) 
        x = torch.relu(self.fc2(x)) 
        x = torch.relu(self.fc3(x)) 
        x = torch.sigmoid(self.fc4(x)) 
        return x

model = ANN()


In [3]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# loop
epochs = 100
for epoch in range(epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # forward propagation 
        scores = model(data)
        loss = criterion(scores, targets.unsqueeze(1))

        # backward ...
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}')

with torch.no_grad():
    y_pred = model(X_test)
    y_pred_class = y_pred.round()
    accuracy = (y_pred_class.eq(y_test.unsqueeze(1))).sum() / float(y_test.shape[0])
    print(f'Accuracy: {accuracy:.4f}')

Epoch 1/100, Loss: 0.6952
Epoch 2/100, Loss: 0.6958
Epoch 3/100, Loss: 0.6953
Epoch 4/100, Loss: 0.6862
Epoch 5/100, Loss: 0.6840
Epoch 6/100, Loss: 0.6802
Epoch 7/100, Loss: 0.6776
Epoch 8/100, Loss: 0.6722
Epoch 9/100, Loss: 0.6675
Epoch 10/100, Loss: 0.6675
Epoch 11/100, Loss: 0.6623
Epoch 12/100, Loss: 0.6514
Epoch 13/100, Loss: 0.6506
Epoch 14/100, Loss: 0.6409
Epoch 15/100, Loss: 0.6217
Epoch 16/100, Loss: 0.6084
Epoch 17/100, Loss: 0.6131
Epoch 18/100, Loss: 0.6045
Epoch 19/100, Loss: 0.6085
Epoch 20/100, Loss: 0.5743
Epoch 21/100, Loss: 0.5772
Epoch 22/100, Loss: 0.5834
Epoch 23/100, Loss: 0.5819
Epoch 24/100, Loss: 0.5507
Epoch 25/100, Loss: 0.5507
Epoch 26/100, Loss: 0.5432
Epoch 27/100, Loss: 0.5765
Epoch 28/100, Loss: 0.4725
Epoch 29/100, Loss: 0.5399
Epoch 30/100, Loss: 0.5749
Epoch 31/100, Loss: 0.4962
Epoch 32/100, Loss: 0.5321
Epoch 33/100, Loss: 0.4252
Epoch 34/100, Loss: 0.4764
Epoch 35/100, Loss: 0.4909
Epoch 36/100, Loss: 0.4727
Epoch 37/100, Loss: 0.5543
Epoch 38/1

In [4]:
test_df = pd.read_csv('test.csv')

test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})
test_df['Age'].fillna(test_df['Age'].mean(), inplace=True)
test_df['Fare'].fillna(test_df['Fare'].mean(), inplace=True)

# Attributes
X_test = test_df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]

# 数据标准化（使用与训练数据相同的标准化参数）
X_test = scaler.transform(X_test)  # scaler 是之前用于训练数据的 StandardScaler 实例

# 转换为 PyTorch 张量
X_test = torch.tensor(X_test, dtype=torch.float32)

# 确保模型处于评估模式
model.eval()

# 进行预测
with torch.no_grad():
    predictions = model(X_test)
    

# 将预测结果转换为类别（例如，0 或 1）
predicted_classes = predictions.round().numpy()
predicted_classes = predicted_classes.flatten()

results_df = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': predicted_classes.astype(int)
})

# 将结果保存到 CSV 文件
results_df.to_csv('prediction_results.csv', index=False)
