***Titanic prediction with LogisticRegression with leaky_relu and dropout on Pytorch***

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [None]:
data = pd.read_csv("/kaggle/input/titanic/train.csv")

In [None]:
data.info()

In [None]:
data.sample(3)

In [None]:
X, y = data.drop(['Survived'], axis = 1), data['Survived']

In [None]:
num_cols = [x for x in X.columns if data[x].dtype in ['int64', 'float64']]
cat_cols = [x for x in X.columns if data[x].dtype == 'object']

In [None]:
from sklearn.preprocessing import MaxAbsScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

**Предобработка данных**

In [None]:
num_transform = Pipeline(steps=[
                ('impute', SimpleImputer(strategy='constant')),
                ('scale', MaxAbsScaler())
])

cat_transform = Pipeline(steps=[
                ('impute', SimpleImputer(strategy='most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown='ignore')),
                ('scale', MaxAbsScaler())
])

In [None]:
preprocess = ColumnTransformer(transformers=[
                               ('cat', cat_transform, cat_cols),
                               ('num', num_transform, num_cols)
])

In [None]:
X = preprocess.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [None]:
X_train.shape, y_train.shape

In [None]:
X_train = X_train.toarray()
X_test = X_test.toarray()
y_train = y_train.values
y_test = y_test.values

**Построение модели**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.utils import shuffle
from torch.autograd import Variable

class LinearRegression(nn.Module):
    def __init__(self,input_size,output_size):
        super(LinearRegression,self).__init__()
        self.f1 = nn.Linear(input_dim, 2000)
        self.f2 = nn.Linear(2000, output_dim)


    def forward(self,x):
        x = self.f1(x)
        x = F.leaky_relu(x)
        x = F.dropout(x, p = 0.3)
        x = self.f2(x)
        return  F.sigmoid(x)

In [None]:
batch_size = 100
batch_no = len(X_train) // batch_size

In [None]:
X_train.shape

In [None]:
def generate_batches(X, y, batch_size):
    assert len(X) == len(y)
    np.random.seed(42)
    X = np.array(X)
    y = np.array(y)
    perm = np.random.permutation(len(X))

    for i in range(len(X)//batch_size):
        if i + batch_size >= len(X):
            continue
        ind = perm[i*batch_size : (i+1)*batch_size]
        yield (X[ind], y[ind])

**Код обучения**

In [None]:
input_dim = 1730
output_dim = 2
learning_rate = 1
model = LinearRegression(input_dim,output_dim)
error = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.5)

loss_list = []
acc_list = []
iteration_number = 300

for iteration in range(iteration_number):
    batch_loss = 0
    batch_accur = 0
    temp = 0

    for (x, y) in generate_batches(X_train, y_train, batch_size):
        inputs = Variable(torch.from_numpy(x)).float()
        labels = Variable(torch.from_numpy(y))
            
        optimizer.zero_grad() 

        results = model(inputs)
        
        loss = error(results, labels)

        batch_loss += loss.data
        
        loss.backward()
        
        optimizer.step()

        with torch.no_grad():
            _, pred = torch.max(results, 1)
            batch_accur += torch.sum(pred == labels)
            temp += len(pred)
    
    loss_list.append(batch_loss/batch_no)
    acc_list.append(batch_accur/temp)
    
    if(iteration % 50 == 0):
        print('epoch {}: loss {}, accuracy {}'.format(iteration, batch_loss/batch_no, batch_accur/temp))

plt.plot(range(iteration_number),loss_list)
plt.xlabel("Number of Iterations")
plt.ylabel("Loss")
plt.show()
plt.plot(range(iteration_number),acc_list)
plt.xlabel("Number of Iterations")
plt.ylabel("Accuracy")
plt.show()

In [None]:
X_test_var = Variable(torch.FloatTensor(X_test), requires_grad=True) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.data.numpy()
print((survived == y_test).sum()/len(survived))

**Предсказания**

In [None]:
X_test_origin = pd.read_csv("/kaggle/input/titanic/test.csv")
submission = pd.read_csv("/kaggle/input/titanic/gender_submission.csv")
X_test_origin = preprocess.transform(X_test_origin)
X_test_origin = X_test_origin.toarray()
X_test_var = Variable(torch.FloatTensor(X_test_origin), requires_grad=True) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.data.numpy()
X_test_1 = pd.read_csv("/kaggle/input/titanic/test.csv")

In [None]:
import csv

submission = [['PassengerId', 'Survived']]
for i in range(len(survived)):
    submission.append([X_test_1.PassengerId.loc[i], survived[i]])

In [None]:
with open('submission.csv', 'w') as submissionFile:
    writer = csv.writer(submissionFile)
    writer.writerows(submission)
    
print('Writing Complete!')