#### Importing required libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#### Importing datasets

In [None]:
tdata=pd.read_csv('../input/titanic/train.csv')

#### Implementing Exploratory Data Analysis

In [None]:
tdata.head()

In [None]:
tdata.shape

In [None]:
tdata.columns

In [None]:
tdata.isnull().sum()

In [None]:
sns.countplot(x='Survived',data=tdata)

In [None]:
sns.countplot(x='Survived',hue='Sex',data=tdata)

In [None]:
sns.countplot(x='Survived',hue='Pclass',data=tdata)

In [None]:
def addage(cols):
    Age=cols[0]
    Pclass=cols[1]
    if pd.isnull(Age):
        if Pclass==1:
            return tdata[tdata['Pclass']==1]['Age'].mean()
        elif Pclass==2:
            return tdata[tdata['Pclass']==2]['Age'].mean()
        elif Pclass==3:
            return tdata[tdata['Pclass']==3]['Age'].mean()
    else:
        return Age

In [None]:
tdata.Age=tdata[['Age','Pclass']].apply(addage, axis=1)

#### Mapping string values

In [None]:
tdata.Sex=tdata.Sex.map({'female':0, 'male':1})
tdata.Embarked=tdata.Embarked.map({'S':0, 'C':1, 'Q':2, 'nan':'NaN'})

#### Dropping Column Cabin

In [None]:
tdata.drop('Cabin', axis=1, inplace=True)

#### Removing rows with null values

In [None]:
tdata.dropna(inplace=True)

In [None]:
#removing unwanted columns
tdata.drop(['Name','PassengerId','Ticket'], axis=1, inplace=True)

#### Feature Engineering

In [None]:
min_ag = min(tdata.Age)
max_ag = max(tdata.Age)
min_fr = min(tdata.Fare)
max_fr = max(tdata.Fare)

In [None]:
tdata.Age = (tdata.Age - min_ag)/(max_ag - min_ag)
tdata.Fare = (tdata.Fare - min_fr)/(max_fr - min_fr)

In [None]:
tdata.head()

#### Splitting data into x and y data

In [None]:
x_data=tdata.drop('Survived',axis=1)
y_data=tdata['Survived']

#### Splitting data into training and test data

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_training_data, x_test_data, y_training_data, y_test_data = train_test_split(x_data, y_data, test_size = 0.2, random_state = 0, stratify=y_data)

#### Creating Model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
model=LogisticRegression()

#### Training model and creating prediction

In [None]:
model.fit(x_training_data, y_training_data)
prdtn=model.predict(x_test_data)

In [None]:
prdtn

#### Accuracy of our model

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test_data,prdtn))

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
print('Accuracy:',accuracy_score(y_test_data, prdtn))

#### Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test_data, prdtn)

#### Confusion matrix using seaborn

In [None]:
cu_matrix = confusion_matrix(y_test_data, prdtn)

In [None]:
sns.heatmap(cu_matrix, annot=True)

#### Now we going to test our model using the test data

In [None]:
df=pd.read_csv('../input/titanic/test.csv')
df1=pd.read_csv('../input/titanic/test.csv')

In [None]:
df.head()

In [None]:
df.isnull().sum()

#### Filling the missing values

In [None]:
df.Age=df[['Age','Pclass']].apply(addage, axis=1)

In [None]:
df.Fare=df.Fare.fillna(df['Fare'].median())

In [None]:
df.Sex=df.Sex.map({'female':0, 'male':1})
df.Embarked=df.Embarked.map({'S':0, 'C':1, 'Q':2, 'nan':'NaN'})

In [None]:
min_ag1 = min(df.Age)
max_ag1 = max(df.Age)
min_fr1 = min(df.Fare)
max_fr1 = max(df.Fare)

In [None]:
df.Age = (df.Age - min_ag1)/(max_ag1 - min_ag1)
df.Fare = (df.Fare - min_fr1)/(max_fr1 - min_fr1)

In [None]:
df.drop(['PassengerId','Name','Ticket','Cabin'], axis=1, inplace=True)

In [None]:
df.head()

#### Predicting the result

In [None]:
pdtn=model.predict(df)

In [None]:
pdtn

In [None]:
submission=pd.DataFrame({'PassengerId':df1['PassengerId'],'Survived':pdtn})
submission.to_csv('submission.csv',index=False)

In [None]:
prdt_df=pd.read_csv('submission.csv')

In [None]:
sns.countplot(x='Survived',data=prdt_df)