### Import the Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Import the dataset

In [None]:
train = pd.read_csv('../input/titanic-train-public-dataset/titanic_train.csv')

### Display the dataset

In [None]:
train.head()

### Check the Null Values

In [None]:
train.isnull()

### Check the Null Values using Heatmap

In [None]:
sns.heatmap(train.isnull(),cbar=False,yticklabels=False,cmap='viridis')

### Exploratory Data Analysis

In [None]:
sns.set_style('whitegrid')
sns.countplot(x='Survived',data=train)

In [None]:
sns.countplot(x='Survived',hue='Sex',data=train)

In [None]:
sns.countplot(x='Survived',hue='Pclass',data=train)

In [None]:
sns.distplot(train['Age'].dropna(),kde=False,bins=50)

In [None]:
train['Age'].plot.hist(bins=50)

In [None]:
train.info()

In [None]:
sns.countplot(x='SibSp',data=train)

In [None]:
train['Fare'].hist(bins=50,figsize=(10,4))

In [None]:
import cufflinks as cf
cf.go_offline()

In [None]:
train['Fare'].iplot(kind='hist',bins=50)

In [None]:
plt.figure(figsize=(10,7))
sns.boxplot(x='Pclass',y='Age',data=train)

### Dealing with Missing Values

In [None]:
def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):
      
        if Pclass == 1:
            return 37
        elif Pclass == 2:
            return 29
        else:
            return 24
        
    else:
        return Age

In [None]:
train['Age'] = train[['Age','Pclass']].apply(impute_age,axis=1)

In [None]:
sns.heatmap(train.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
train.drop('Cabin',axis=1,inplace=True)

In [None]:
train.head()

In [None]:
sns.heatmap(train.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
train.dropna(inplace=True)

In [None]:
sns.heatmap(train.isnull(),yticklabels=False,cbar=False,cmap='viridis')

Here, we can see that there are No more Missing values

In [None]:
pd.get_dummies(train['Sex'])

In [None]:
sex = pd.get_dummies(train['Sex'],drop_first=True)

In [None]:
sex

In [None]:
pd.get_dummies(train['Embarked'])

In [None]:
embark = pd.get_dummies(train['Embarked'],drop_first=True)

In [None]:
embark

In [None]:
train = pd.concat([train,sex,embark],axis=1)

In [None]:
train.head()

In [None]:
train.drop(['Sex','Embarked','Name','Ticket'],axis=1,inplace=True)

In [None]:
train.head()

In [None]:
train.tail()

In [None]:
train.drop('PassengerId',axis=1,inplace=True)

In [None]:
train.head()

### Splitting the data into Train and Test set

In [None]:
X = train.drop('Survived',axis=1)
y = train['Survived']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [None]:
from sklearn.linear_model import LogisticRegression

logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)

In [None]:
predictions = logmodel.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test,predictions))

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test,predictions)