In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
train = pd.read_csv('../input/train.csv')

In [None]:
train.info()

In [None]:
train.head()

# Visualization Data

In [None]:
# Check Missing Data
sns.heatmap(train.isnull(), yticklabels=False, cbar=False, cmap='YlGnBu')

In [None]:
sns.set_style('whitegrid')

In [None]:
sns.countplot(x='Survived', data=train)

In [None]:
sns.countplot(x='Survived', data=train, hue='Sex', palette='RdBu_r')

In [None]:
sns.countplot(x='Survived', data=train, hue='Pclass')

In [None]:
sns.distplot(train['Age'].dropna(), kde=False, bins=30)

In [None]:
train['Age'].plot.hist(bins=30)

In [None]:
sns.countplot(x='SibSp', data=train)

In [None]:
train['Fare'].hist(bins=40, figsize=(10,4))

# Cleaning Data

In [None]:
import cufflinks as cf

In [None]:
cf.go_offline()

In [None]:
train['Fare'].iplot(kind='hist', bins=50)

In [None]:
sns.boxplot(x='Pclass', y='Age', data=train)

In [None]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):
        
        if Pclass == 1:
            return 37
        elif Pclass == 2:
            return 29
        else:
            return 24
    else:
        return Age
            

In [None]:
train['Age'] = train[['Age', 'Pclass']].apply(impute_age, axis=1)

In [None]:
sns.heatmap(train.isnull(), yticklabels=False, cbar=False, cmap='BuPu')

In [None]:
train.drop('Cabin', axis=1, inplace=True)

In [None]:
train.head()

In [None]:
sns.heatmap(train.isnull(), yticklabels=False, cbar=False, cmap='Greens')

In [None]:
train.dropna(inplace=True)

# Categorical Data

In [None]:
sex = pd.get_dummies(train['Sex'], drop_first=True)

In [None]:
embark = pd.get_dummies(train['Embarked'], drop_first=True)

In [None]:
train = pd.concat([train,sex,embark], axis=1)

In [None]:
train.head()

In [None]:
train.drop(['Sex', 'Embarked', 'Name', 'Ticket'], axis=1, inplace=True)

In [None]:
train.head()

In [None]:
train.tail()

In [None]:
train.drop(['PassengerId'], axis=1, inplace=True)

In [None]:
train.head()

# Train Model

In [None]:
X = train.drop('Survived', axis=1)
y = train['Survived']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model = LogisticRegression()

In [None]:
model.fit(X_train, y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
print(classification_report(y_test,predictions))

In [None]:
print(confusion_matrix(y_test,predictions))