# パッケージの読み込み

In [1]:
import numpy as np
import pandas as pd

# データの読み込み

In [2]:
train = pd.read_csv('../input/titanic/train.csv')
test = pd.read_csv('../input/titanic/test.csv')
gender_submission = pd.read_csv('../input/titanic/gender_submission.csv')

In [3]:
data = pd.concat([train, test], sort=False)

# 特徴量エンジニアリング

## Sex

In [4]:
data['Sex'].replace(['male', 'female'], [0, 1], inplace=True)

## Embarked

In [5]:
data['Embarked'].fillna(('S'), inplace=True)
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)

## Fare

In [6]:
data['Fare'].fillna(np.mean(data['Fare']), inplace=True)

## Age

In [7]:
age_avg = data['Age'].mean()
age_std = data['Age'].std()

data['Age'].fillna(np.random.randint(age_avg - age_std, age_avg + age_std), inplace=True)

In [8]:
delete_columns = ['Name', 'PassengerId', 'SibSp', 'Parch', 'Ticket', 'Cabin']
data.drop(delete_columns, axis=1, inplace=True)

In [9]:
train = data[:len(train)]
test = data[len(train):]

In [10]:
y_train = train['Survived']
X_train = train.drop('Survived', axis=1)
X_test = test.drop('Survived', axis=1)

# 機械学習アルゴリズムの学習

In [11]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(penalty='l2', solver='sag', random_state=0)
model.fit(X_train, y_train)



LogisticRegression(random_state=0, solver='sag')

In [12]:
import pickle
filename = 'model_titanic.sav'
pickle.dump(model, open(filename, 'wb'))

# 機械学習アルゴリズムの予測

In [13]:
import pickle
filename = 'model_titanic.sav'
loaded_model = pickle.load(open(filename, 'rb'))

In [14]:
y_pred = loaded_model.predict(X_test)

# 提出

In [15]:
sub = pd.read_csv('../input/titanic/gender_submission.csv')
sub['Survived'] = list(map(int, y_pred))
sub.to_csv('submission.csv', index=False)

In [16]:
!pip install kaggle

Collecting kaggle
  Using cached kaggle-1.5.12-py3-none-any.whl
Installing collected packages: kaggle
Successfully installed kaggle-1.5.12


In [17]:
from kaggle.api.kaggle_api_extended import KaggleApi
api = KaggleApi()
api.authenticate()
api.competition_submit(file_name='submission.csv', message='update', competition='titanic')

100%|██████████| 2.77k/2.77k [00:01<00:00, 1.52kB/s]


Successfully submitted to Titanic - Machine Learning from Disaster