In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
df = pd.read_csv('updated_titanic_data.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,FamilySize
0,1,0,3,"Braund, Mr. Owen Harris",1,22.000000,1,0,A/5 21171,7.2500,,S,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.000000,1,0,PC 17599,71.2833,C85,C,1
2,3,1,3,"Heikkinen, Miss. Laina",0,26.000000,0,0,STON/O2. 3101282,7.9250,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.000000,1,0,113803,53.1000,C123,S,1
4,5,0,3,"Allen, Mr. William Henry",1,35.000000,0,0,373450,8.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
884,887,0,2,"Montvila, Rev. Juozas",1,27.000000,0,0,211536,13.0000,,S,0
885,888,1,1,"Graham, Miss. Margaret Edith",0,19.000000,0,0,112053,30.0000,B42,S,0
886,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",0,21.773973,1,2,W./C. 6607,23.4500,,S,3
887,890,1,1,"Behr, Mr. Karl Howell",1,26.000000,0,0,111369,30.0000,C148,C,0


In [3]:
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']
x = df[features]
y = df["Survived"]

In [4]:
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [5]:
preprocessor = ColumnTransformer(
   transformers = [('num',numerical_transformer,['Age', 'SibSp','Parch'] ),
                   ('cat', categorical_transformer,['Pclass', 'Sex', 'Embarked'])               
   ])

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

In [7]:
lr_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('classifier', LogisticRegression(random_state=42) )
                             ])

lr_pipeline.fit(x_train, y_train)

In [8]:
linear_reg_pipeline = Pipeline(steps=[('preprocessor',preprocessor ),
                                      ('regressor', LinearRegression())
                                    ])

linear_reg_pipeline.fit(x_train, y_train)