### Step1: Import all Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Step2: Import the Dataset

In [None]:
df= pd.read_csv('../input/titanic/train.csv')

### Step3: Data Overview

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

### Step4: Exploratory Data Analysis

In [None]:
sns.countplot(data=df, x='Survived')

In [None]:
sns.countplot(data=df, x='Survived', hue= 'Sex')

In [None]:
sns.countplot(data=df, x='Survived', hue= 'Pclass')

In [None]:
sns.countplot(data=df, x='SibSp')

### Step5: Data Preparation
**A- How much data is missing?**

In [None]:
100*(df.isnull().sum()/len(df))

In [None]:
def missing_percent(df):
    nan_percent= 100*(df.isnull().sum()/len(df))
    nan_percent= nan_percent[nan_percent>0].sort_values()
    return nan_percent

In [None]:
nan_percent= missing_percent(df)

In [None]:
nan_percent

In [None]:
sns.barplot(x=nan_percent.index, y=nan_percent)

In [None]:
#dealing with Cabin: as the missing percentage is very high, we decide to drop this dfcolumn:
df= df.drop('Cabin', axis=1 )

In [None]:
nan_percent= missing_percent(df)
sns.barplot(x=nan_percent.index, y=nan_percent)

In [None]:
#Dealing with imbarked:
df[df['Embarked'].isnull()]

In [None]:
# there is just two rows with missing data in Embarked Columns, so we decide to drop these two rows:

df= df.dropna(subset=['Embarked'], axis=0)

In [None]:
nan_percent= missing_percent(df)
sns.barplot(x=nan_percent.index, y=nan_percent)

In [None]:
#Dealing with Age:
# We Assume that the Pclass is related to the Age, please chech it:
sns.boxplot(data=df, x='Pclass', y='Age')

In [None]:
#As the above boxplot shows, the mean of Age is different in each category of Pclass,
#So we decide to fill the missing value of Age with mean of Age based on the Pclass:

#Lets chech the mean of Age for each category of Pclass:
df.groupby('Pclass')['Age'].mean()

In [None]:
#Please Fill the missing value of Age as mentioned above:
df['Age']= df.groupby('Pclass')['Age'].transform(lambda val: val.fillna(val.mean()))

In [None]:
nan_percent= missing_percent(df)


In [None]:
nan_percent

**Great, Now we don't have any Missing data**

### B-Dealing with Categorical Data

In [None]:
df.columns

In [None]:
df_num= df[['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']]
df_obj=df[['Sex', 'Embarked']]

In [None]:
df_obj= pd.get_dummies(df_obj, drop_first=True)

In [None]:
df_obj.shape

In [None]:
df= pd.concat([df_num, df_obj], axis=1)

In [None]:
df.head()

**Now the Dataset is ready for any machine learning algorithm**

## Step6: Determine the Features & Target Variable

In [None]:
X=df.drop('Survived', axis=1)
y=df['Survived']

In [None]:
X.head()

## Step7: Split the Dataset to Train & Test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

## Step8: Scaling the Features

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler=StandardScaler()

In [None]:
scaled_X_train=scaler.fit_transform(X_train)
scaled_X_test= scaler.fit_transform(X_test)

## Step9: Train the Model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
log_model= LogisticRegression()

In [None]:
log_model.fit(scaled_X_train, y_train)

In [None]:
#Model Coeficient:
log_model.coef_

### Step10: Predicting Test Data


In [None]:
y_pred= log_model.predict(scaled_X_test)

In [None]:
#The Prediction Value VS Actual Value of Test Dataset
pd.DataFrame({'Y_Test': y_test,'Y_Pred':y_pred}).head(5)

### Step10: Evaluating the Model

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, plot_confusion_matrix

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
plot_confusion_matrix(log_model, scaled_X_test, y_test)

In [None]:
print(classification_report(y_test, y_pred))

### Step11: Evaluating Curves and AUC


In [None]:
from sklearn.metrics import precision_recall_curve, plot_precision_recall_curve, plot_roc_curve

In [None]:
plot_precision_recall_curve(log_model, scaled_X_test, y_test)

In [None]:
plot_roc_curve(log_model, scaled_X_test, y_test)