# Loading the Data

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

titanic = pd.read_csv("train.csv")

titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# Pre-Processing

# 1. Drop features that do not seem to add any value to our model

In [13]:
titanic.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

# 2. Create categorical dummies for the embarkment ports

In [14]:
ports = pd.get_dummies(titanic.Embarked, prefix='Embarked')
ports.head()

Unnamed: 0,Embarked_C,Embarked_Q,Embarked_S
0,0,0,1
1,1,0,0
2,0,0,1
3,0,0,1
4,0,0,1


In [15]:
titanic = titanic.join(ports)
titanic.drop(['Embarked'], axis=1, inplace=True)

In [16]:
titanic

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,0,3,male,22.0,1,0,7.2500,0,0,1
1,1,1,female,38.0,1,0,71.2833,1,0,0
2,1,3,female,26.0,0,0,7.9250,0,0,1
3,1,1,female,35.0,1,0,53.1000,0,0,1
4,0,3,male,35.0,0,0,8.0500,0,0,1
...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,0,0,1
887,1,1,female,19.0,0,0,30.0000,0,0,1
888,0,3,female,,1,2,23.4500,0,0,1
889,1,1,male,26.0,0,0,30.0000,1,0,0


# 3. Transform gender names to binaries

In [17]:
titanic.Sex = titanic.Sex.map({'male': 0, 'female': 1})

In [18]:
titanic

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,0,3,0,22.0,1,0,7.2500,0,0,1
1,1,1,1,38.0,1,0,71.2833,1,0,0
2,1,3,1,26.0,0,0,7.9250,0,0,1
3,1,1,1,35.0,1,0,53.1000,0,0,1
4,0,3,0,35.0,0,0,8.0500,0,0,1
...,...,...,...,...,...,...,...,...,...,...
886,0,2,0,27.0,0,0,13.0000,0,0,1
887,1,1,1,19.0,0,0,30.0000,0,0,1
888,0,3,1,,1,2,23.4500,0,0,1
889,1,1,0,26.0,0,0,30.0000,1,0,0


# 4. Replace missing values

In [19]:
titanic[pd.isnull(titanic).any(axis=1)]

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
5,0,3,0,,0,0,8.4583,0,1,0
17,1,2,0,,0,0,13.0000,0,0,1
19,1,3,1,,0,0,7.2250,1,0,0
26,0,3,0,,0,0,7.2250,1,0,0
28,1,3,1,,0,0,7.8792,0,1,0
...,...,...,...,...,...,...,...,...,...,...
859,0,3,0,,0,0,7.2292,1,0,0
863,0,3,1,,8,2,69.5500,0,0,1
868,0,3,0,,0,0,9.5000,0,0,1
878,0,3,0,,0,0,7.8958,0,0,1


In [20]:
titanic.isnull().sum()

Survived        0
Pclass          0
Sex             0
Age           177
SibSp           0
Parch           0
Fare            0
Embarked_C      0
Embarked_Q      0
Embarked_S      0
dtype: int64

In [22]:
!pip install fancyimpute

Collecting fancyimpute
  Downloading fancyimpute-0.5.5.tar.gz (20 kB)
Collecting knnimpute
  Downloading knnimpute-0.1.0.tar.gz (8.3 kB)
Collecting cvxpy>=1.0.6
  Downloading cvxpy-1.1.7.tar.gz (1.0 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting keras>=2.0.0
  Downloading Keras-2.4.3-py2.py3-none-any.whl (36 kB)
Collecting tensorflow
  Downloading tensorflow-2.3.1-cp38-cp38-win_amd64.whl (342.5 MB)
Collecting scs>=1.1.6
  Downloading scs-2.1.2.tar.gz (3.5 MB)
Collecting osqp>=0.4.1
  Downloading osqp-0.6.1-cp38-none-win_amd64.whl (153 kB)
Collecting ecos>=2
  Downloading ecos-2.0.7.post1.tar.gz (126 kB)

  ERROR: Command errored out with exit status 1:
   command: 'C:\Users\hp\Anaconda0312\python.exe' 'C:\Users\hp\Anaconda0312\lib\site-packages\pip\_vendor\pep517\_in_process.py' build_wheel 'C:\Users\hp\AppData\Local\Temp\tmptldhd600'
       cwd: C:\Users\hp\AppData\Local\Temp\pip-install-l5co925z\cvxpy
  Complete output (411 lines):
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.8
  creating build\lib.win-amd64-3.8\cvxpy
  copying cvxpy\error.py -> build\lib.win-amd64-3.8\cvxpy
  copying cvxpy\settings.py -> build\lib.win-amd64-3.8\cvxpy
  copying cvxpy\__init__.py -> build\lib.win-amd64-3.8\cvxpy
  creating build\lib.win-amd64-3.8\examples
  copying examples\admm_example.py -> build\lib.win-amd64-3.8\examples
  copying examples\admm_lasso.py -> build\lib.win-amd64-3.8\examples
  copying examples\admm_svm_pool.py -> build\lib.win-amd64-3.8\examples
  copying examples\admm_svm_procs.py -> build\lib.win-amd64-3.8\examples
  cop


Collecting termcolor>=1.1.0
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Collecting opt-einsum>=2.3.2
  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)
Collecting astunparse==1.6.3
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting absl-py>=0.7.0
  Downloading absl_py-0.11.0-py3-none-any.whl (127 kB)
Collecting keras-preprocessing<1.2,>=1.1.1
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
Collecting google-pasta>=0.1.8
  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting tensorboard<3,>=2.3.0
  Downloading tensorboard-2.4.0-py3-none-any.whl (10.6 MB)
Collecting gast==0.3.3
  Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Collecting protobuf>=3.9.2
  Downloading protobuf-3.14.0-py2.py3-none-any.whl (173 kB)
Collecting grpcio>=1.8.6
  Downloading grpcio-1.34.0-cp38-cp38-win_amd64.whl (2.9 MB)
Collecting tensorflow-estimator<2.4.0,>=2.3.0
  Downloading tensorflow_estimator-2.3.0-py2.py3-none-any.whl (459 kB)
Collectin


  copying cvxpy\atoms\norm_nuc.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\one_minus_pos.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\pf_eigenvalue.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\pnorm.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\prod.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\quad_form.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\quad_over_lin.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\sigma_max.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\sign.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\sum_largest.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\sum_smallest.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\sum_squares.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\suppfunc.py -> build\lib.win-amd64-3.8\cvxpy\atoms
  copying cvxpy\atoms\total_variat

  copying cvxpy\transforms\scalarize.py -> build\lib.win-amd64-3.8\cvxpy\transforms
  copying cvxpy\transforms\suppfunc.py -> build\lib.win-amd64-3.8\cvxpy\transforms
  copying cvxpy\transforms\__init__.py -> build\lib.win-amd64-3.8\cvxpy\transforms
  creating build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\canonical.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\coeff_extractor.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\cvxpy_upgrade.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\debug_tools.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\deterministic.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\grad.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\key_utils.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilities\performance_utils.py -> build\lib.win-amd64-3.8\cvxpy\utilities
  copying cvxpy\utilitie


  Building wheel for knnimpute (setup.py): finished with status 'done'
  Created wheel for knnimpute: filename=knnimpute-0.1.0-py3-none-any.whl size=11357 sha256=55c04789ad9a9db0b17cddbad6a802752fb6a54ac5e7c484b0b07daf9d2018c9
  Stored in directory: c:\users\hp\appdata\local\pip\cache\wheels\5f\e9\7a\9969b4e11eb626b45f12a46849b8c65aa718244a243583caf1
  Building wheel for cvxpy (PEP 517): started
  Building wheel for cvxpy (PEP 517): finished with status 'error'
  Building wheel for scs (setup.py): started
  Building wheel for scs (setup.py): finished with status 'error'
  Running setup.py clean for scs
  Building wheel for ecos (setup.py): started
  Building wheel for ecos (setup.py): finished with status 'error'
  Running setup.py clean for ecos
  Building wheel for termcolor (setup.py): started
  Building wheel for termcolor (setup.py): finished with status 'done'
  Created wheel for termcolor: filename=termcolor-1.1.0-py3-none-any.whl size=4835 sha256=2c1a4a7a0e02db7ac02d955f4d7c6d

In [24]:
#Impute the missing values.
from fancyimpute import MICE
trans = MICE(verbose=False)
f_complete = trans.complete(titanic)

ModuleNotFoundError: No module named 'fancyimpute'

In [10]:
titanic.Age.fillna(titanic.Age.mean(), inplace=True)

In [11]:
titanic.columns

Index(['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked_C', 'Embarked_Q', 'Embarked_S'],
      dtype='object')

In [13]:
X = titanic[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare',
       'Embarked_C', 'Embarked_Q', 'Embarked_S']]

In [14]:
y = titanic[['Survived']]

# TRAIN TEST SPLIT

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

In [25]:
model = LogisticRegression()
model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [26]:
model.predict(X_test)

array([1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [27]:
y_pred = model.predict(X_test)

In [28]:
y_pred

array([1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [None]:
from sklearn import metrics

In [31]:
y_pred = pd.Series(model.predict(X_test))

In [32]:
y_pred

0      1
1      0
2      1
3      0
4      0
      ..
218    0
219    0
220    0
221    0
222    0
Length: 223, dtype: int64

In [33]:
y_test = y_test.reset_index(drop=True)

In [34]:
y_test

Unnamed: 0,Survived
0,1
1,0
2,0
3,0
4,0
...,...
218,1
219,0
220,0
221,1


In [39]:
z1 = pd.concat([y_test,y_pred],axis=1)

In [40]:
z1.columns = ['Actual','Predicted']

In [48]:
z1

Unnamed: 0,Actual,Predicted
0,1,1
1,0,0
2,0,1
3,0,0
4,0,0
...,...,...
218,1,0
219,0,0
220,0,0
221,1,0


In [42]:
from sklearn import metrics

In [44]:
metrics.accuracy_score(y_test,y_pred)

0.8071748878923767

In [46]:
metrics.precision_score(y_test,y_pred)

0.759493670886076

In [47]:
metrics.recall_score(y_test,y_pred)

0.7142857142857143

In [45]:
metrics.confusion_matrix(y_test,y_pred)

array([[120,  19],
       [ 24,  60]], dtype=int64)

In [None]:
    
0 [[120,  19],     
1  [ 24,  60]]

0[130,9 ]
1[10,74 ]

In [None]:
metrics.accuracy_score(y_test,y_pred)