In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Fetch the data - data aquisition 

In [None]:
train = pd.read_csv("titanic_train.csv")
train.tail()

### Exploratory Data Analysis

In [None]:
#Missing Data

In [None]:
train.isnull().sum()

In [None]:
sns.heatmap(train.isnull())

In [None]:
sns.countplot(x='Survived', hue='Gender',  data=train, palette='RdBu_r')

In [None]:
train.shape

In [None]:
# Note this command
train.query("Survived==1 and Gender=='male'").index.size

In [None]:
sns.histplot(train['Age'].dropna(), kde=False, bins=30, color='red')

In [None]:
sns.histplot(data=train, x='Age', hue='Survived', kde=True, fill='True')

In [None]:
sns.countplot(x='SibSp', data=train)

In [None]:
train['Fare'].hist(bins=50)

### Data Cleaning

In [None]:
sns.boxplot(x='Pclass', y='Age', data=train)

In [None]:
def replace_age(c):
    age = c[0]
    pclass = c[1]
    if pd.isnull(age):
        if pclass == 1:
            return 38
        elif pclass == 2:
            return 29
        else:
            return 24
    else:
        return age

In [None]:
train['Age'] = train[['Age', 'Pclass']].apply(replace_age, axis=1)

In [None]:
train.head()

In [None]:
sns.heatmap(train.isnull())

In [None]:
train.drop('Cabin', inplace=True, axis=1)

In [None]:
train.head()

In [None]:
sns.heatmap(train.isnull())

### Feature Engineering

In [None]:
train.head(10)

In [None]:
Gender = pd.get_dummies(train['Gender'], drop_first=True)
Gender

In [None]:
embark = pd.get_dummies(train['Embarked'], drop_first=True)
embark

In [None]:
train.drop(['Gender', 'Embarked', 'Name', 'Ticket'], axis=1, inplace=True)
train = pd.concat([train, Gender, embark], axis=1 )

In [None]:
train.head()

In [None]:
train['male'].dtype

### Training the Model

In [None]:
from sklearn.model_selection import train_test_split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(train.drop('Survived',axis=1), 
                                                    train['Survived'], test_size=0.10, 
                                                    random_state=101)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
logmodel = LogisticRegression(solver='liblinear')
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

### Evaluation

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
print(classification_report(y_test, predictions))

In [None]:
print(confusion_matrix(y_test, predictions))