In [3]:
import pandas as pd
import numpy as np
import torch, torchvision
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader,Dataset
import tqdm

In [4]:
train_filepath = '../data/train.csv'
test_filepath = '../data/test.csv'
traindata = pd.read_csv(train_filepath)
testdata = pd.read_csv(test_filepath)

In [5]:
def process_df(df):
    df = df.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)
    df['Age'] = df['Age'].fillna(df['Age'].median())
    df['Embarked'] = df['Embarked'].fillna('S')
    df = df.replace('male',0)
    df = df.replace('female',1)
    df['Embarked'] = df['Embarked'].replace('S',0)
    df['Embarked'] = df['Embarked'].replace('C',1)
    df['Embarked'] = df['Embarked'].replace('Q',2)
    
    return df

In [6]:
traindata = process_df(traindata)
testdata = process_df(testdata)

In [7]:
class TitanicDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.X = self.df.drop(['Survived'], axis=1)
        self.Y = self.df['Survived']
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self,idx):
#        print(type(self.X.iloc[idx,:]))
#        print(type(self.Y.iloc[idx]))
        return self.X.iloc[idx,:].values, self.Y.iloc[idx]

In [8]:
train_dataset = TitanicDataset(traindata)
len(train_dataset)

891

In [11]:
class Net(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(Net, self).__init__()
        self.f1 = nn.Linear(in_features, hidden_features)
        self.f2 = nn.Linear(hidden_features, hidden_features)
        self.f3 = nn.Linear(hidden_features,out_features)
        
    def forward(self, X):
        X = F.relu(self.f1(X))
        X = F.relu(self.f2(X))
        X = F.softmax(self.f3(X))
        
        return X

input_sz = 7
hidden_sz = 16
out_sz = 2
net = Net(input_sz, hidden_sz, out_sz)
print(net)

Net(
  (f1): Linear(in_features=7, out_features=16, bias=True)
  (f2): Linear(in_features=16, out_features=16, bias=True)
  (f3): Linear(in_features=16, out_features=2, bias=True)
)


In [12]:
learning_rate = 0.01
loss_func = nn.MSELoss(reduction="sum")
optimizer = optim.Adam(net.parameters(), lr=learning_rate)
epoch = 32
def train():
    for e in range(epoch):
        for X, labels in train_dataloader:
            labels = F.one_hot(labels)
            y = net(X.float())
            print(y)
            loss = loss_func(y, labels.float())
            loss.backward()
            optimizer.step()
            
def convert_label_to_onehot(labels):
    onehot = np.zeros((len(labels), max(labels)+1))
    idx = [(i, t.item()) for i, t in enumerate(labels)]
    for i in idx:
        onehot[i] = 1
    return onehot

train()

  # This is added back by InteractiveShellApp.init_path()


tensor([[0.6315, 0.3685],
        [0.6492, 0.3508],
        [0.7186, 0.2814],
        [0.6414, 0.3586],
        [0.9078, 0.0922],
        [0.6347, 0.3653],
        [0.9599, 0.0401],
        [0.8439, 0.1561],
        [0.6320, 0.3680],
        [0.6642, 0.3358],
        [0.8589, 0.1411],
        [0.6287, 0.3713],
        [0.6364, 0.3636],
        [0.6584, 0.3416],
        [0.6840, 0.3160],
        [0.6499, 0.3501],
        [0.6131, 0.3869],
        [0.9949, 0.0051],
        [0.6824, 0.3176],
        [0.9587, 0.0413],
        [0.6230, 0.3770],
        [0.9315, 0.0685],
        [0.7654, 0.2346],
        [0.7089, 0.2911],
        [0.9253, 0.0747],
        [0.6493, 0.3507],
        [0.6485, 0.3515],
        [0.9922, 0.0078],
        [0.6933, 0.3067],
        [0.7997, 0.2003],
        [0.9969, 0.0031],
        [0.8063, 0.1937]], grad_fn=<SoftmaxBackward>)
tensor([[0.8432, 0.1568],
        [0.6955, 0.3045],
        [0.6151, 0.3849],
        [0.7739, 0.2261],
        [0.7663, 0.2337],
        [0

In [13]:
# torch.max()の簡単な説明
prob = torch.tensor([[0.1, 0.9],
                    [0.2, 0.8],
                    [0.6, 0.4]])
max, argmax = torch.max(prob, dim=1)
print("max\t",max)
print("argmax\t",argmax)   

max	 tensor([0.9000, 0.8000, 0.6000])
argmax	 tensor([1, 1, 0])


In [14]:
def test():
    test_X = torch.tensor(testdata.iloc[:,:].values)
    test_Y = net(test_X.float())
    survived = torch.max(test_Y, dim=1)[1]
    test_paID = pd.read_csv('../data/gender_submission.csv')['PassengerId']
    sub_df = pd.DataFrame({"PassengerId":test_paID.values, "Survived":survived})
    print(sub_df)
    return sub_df
    
sub_df = test()
sub_df.to_csv("./submissions.csv", index=False)
    

     PassengerId  Survived
0            892         0
1            893         0
2            894         0
3            895         0
4            896         0
..           ...       ...
413         1305         0
414         1306         0
415         1307         0
416         1308         0
417         1309         0

[418 rows x 2 columns]


  # This is added back by InteractiveShellApp.init_path()
