### Import Libraries

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Load Data

In [3]:
passengers = pd.read_csv('titanic.csv')

### Explore Data

In [4]:
passengers.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [5]:
passengers.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Convert Labels Values to Numeric Values

In [6]:
encoder = LabelEncoder()
passengers['Sex'] = encoder.fit_transform(passengers['Sex'])

In [7]:
passengers['Sex']

0      1
1      0
2      0
3      0
4      1
      ..
886    1
887    0
888    0
889    1
890    1
Name: Sex, Length: 891, dtype: int32

In [8]:
for column in passengers.columns:
    print(f"{column} has null values:{passengers[column].isnull().values.any()}")

PassengerId has null values:False
Survived has null values:False
Pclass has null values:False
Name has null values:False
Sex has null values:False
Age has null values:True
SibSp has null values:False
Parch has null values:False
Ticket has null values:False
Fare has null values:False
Cabin has null values:True
Embarked has null values:True


### Interpolation of nan values

In [14]:
mean_age = np.nanmean(passengers['Age'].values)

In [15]:
passengers['Age'].fillna(value=mean_age,inplace=True)

### Add new columns

In [16]:
passengers['Class1'] = passengers['Pclass'].apply(lambda x: 1 if x == 1 else 0)
passengers['Class2'] = passengers['Pclass'].apply(lambda x: 1 if x == 2 else 0)

### Split Features and Target Column

In [20]:
features = passengers[['Sex','Age','Class1','Class2','Fare']]
target = passengers['Survived']

### Create training and test set

In [25]:
x_train,x_test,y_train,y_test = train_test_split(features,target,train_size=0.8,test_size=0.2,random_state=100)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [26]:
x_train

array([[ 0.7243102 , -0.69550063, -0.58383755, -0.5078883 , -0.48578107],
       [ 0.7243102 , -1.61570893, -0.58383755, -0.5078883 ,  0.25965275],
       [ 0.7243102 , -0.08202843, -0.58383755, -0.5078883 , -0.48625739],
       ...,
       [-1.38062393,  0.68481182,  1.7128052 , -0.5078883 ,  0.95046842],
       [-1.38062393, -0.02841728, -0.58383755, -0.5078883 ,  0.69119463],
       [-1.38062393, -0.00534441,  1.7128052 , -0.5078883 ,  1.14750492]])

### Training of the model

In [27]:
model = LogisticRegression()
model.fit(x_train,y_train)
# Score the model on the train data
print(model.score(x_train,y_train))
# Score the model on the test data
print(model.score(x_test,y_test))

0.7935393258426966
0.7932960893854749


### Coefficients' Analysis

In [28]:
coeff = model.coef_
coeff

array([[-1.22840254, -0.42565236,  0.95863525,  0.53200217,  0.08043013]])

### Survival Predictions 

In [30]:
just_a_random_guy = np.array([1.0,38.0,0.0,1.0,44.900])
random_passengers = np.array([just_a_random_guy])
random_passengers = scaler.transform(random_passengers)
model.predict(random_passengers)



array([0], dtype=int64)