## Initializing

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

In [None]:
file_string = "/content/drive/My Drive/titanic_data.csv"
data = pd.read_csv(file_string)

## Exploration

In [None]:
data

`pclass`: passenger class (1-3)

`sibsp`: number of siblings or spouses aboard

`parch`: number of parents or children aboard

`fare`: passenger fare

`embarked`:  port of embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)




In [None]:
data.describe(include='all')

In [None]:
data.info()

## Preprocessing

In [None]:
data.isnull().sum()

In [None]:
#Remove samples (rows) if NaN values are present.
data = data.dropna(axis=0) # axis=0 specifies we're operating on rows
print(f"The dataset's shape without NaN values: {data.shape}")

In [None]:
data = pd.get_dummies(data)
data = data.drop(columns=['sex_male']) # all information is already included in sex_female, so there's no need to keep this feature
data.head()

In [None]:
# first split x and y
X = data.drop("survived", axis=1)
y = data.loc[:, "survived"]
# learner view:
# X = data.drop(label_here, axis=1)
# y = data.loc[:, label_here]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)
print(f"X train length: {len(X_train)}, Y train length: {len(y_train)}")
print(f"X test length: {len(X_test)}, Y test length {len(y_test)}")

## Training

In [None]:
from sklearn.linear_model import LogisticRegression
log_reg_model = LogisticRegression(max_iter=500)
log_reg_model.fit(X_train,y_train)
# learner view:
# log_reg_model.fit("what goes here?")

## Testing

In [None]:
log_reg_model.score(X_test, y_test)
# what's a one-liner we can use to compute log_reg_model's accuracy?

## Deployment

In [None]:
leonardo = pd.DataFrame({"pclass": [3.0], "age": [23.0000], "sibsp": [0.0], 
                         "parch": [0.0], "sex_female": [0.0], "sex_male": [1], 
                         "embarked_C":[1], "embarked_Q": [0], "embarked_S": [0]})
kate = pd.DataFrame({"pclass": [1.0], "age": [22.0000], "sibsp": [1.0], 
                     "parch": [1.0], "sex_female": [1.0], "sex_male": [0], 
                     "embarked_C":[0], "embarked_Q": [1], "embarked_S": [0]})

leos_fate = log_reg_model.predict(leonardo)
kates_fate = log_reg_model.predict(kate)
# learner's view:
# leos_fate = "what goes here?"
# kates_fate = "what goes here?" 

In [None]:
print(leos_fate)

In [None]:
print(kates_fate)