In [628]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_log_error
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
import lightgbm as lgbm
import optuna
from catboost import CatBoostClassifier

train_data = pd.read_csv("train.csv")
train_data.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [629]:
X = train_data.drop(columns=["Transported", "Name"])
X.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0


In [630]:
y = train_data["Transported"]
y = y.map({False: 0, True: 1})
y.head()

0    0
1    1
2    0
3    0
4    1
Name: Transported, dtype: int64

In [631]:
X["CryoSleep"] = X["CryoSleep"].map({False: 0, True: 1})
X["VIP"] = X["VIP"].map({False: 0, True: 1})
X.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck
0,0001_01,Europa,0.0,B/0/P,TRAPPIST-1e,39.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0002_01,Earth,0.0,F/0/S,TRAPPIST-1e,24.0,0.0,109.0,9.0,25.0,549.0,44.0
2,0003_01,Europa,0.0,A/0/S,TRAPPIST-1e,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0
3,0003_02,Europa,0.0,A/0/S,TRAPPIST-1e,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0
4,0004_01,Earth,0.0,F/1/S,TRAPPIST-1e,16.0,0.0,303.0,70.0,151.0,565.0,2.0


In [632]:
def count_nans(data):
  for col in data.columns:
    nans = data[col].isna().sum()
    if nans > 0:
      print(col + "    " + str(nans))

count_nans(X)

HomePlanet    201
CryoSleep    217
Cabin    199
Destination    182
Age    179
VIP    203
RoomService    181
FoodCourt    183
ShoppingMall    208
Spa    183
VRDeck    188


In [633]:
group_data = X["PassengerId"].str.split("_")
group_data = list(group_data)
group_ids = []
for i in range(len(group_data)):
  group_ids.append(group_data[i][0])

group_ids[:5]

['0001', '0002', '0003', '0003', '0004']

In [634]:
group_sizes = [1]
current = group_ids[0]
for i in range(1, len(group_ids)):
  if group_ids[i] == current:
    group_sizes[-1] += 1
  else:
    group_sizes.append(1)
    current = group_ids[i]

group_sizes[:5]

[1, 1, 2, 1, 1]

In [635]:
group_sizes_column = []
for number in group_sizes:
  for i in range(0, number):
    group_sizes_column.append(number)
  
group_sizes_column[:10]

[1, 1, 2, 2, 1, 1, 2, 2, 1, 3]

In [636]:
X["GroupSize"] = group_sizes_column
X.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,GroupSize
0,0001_01,Europa,0.0,B/0/P,TRAPPIST-1e,39.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,0002_01,Earth,0.0,F/0/S,TRAPPIST-1e,24.0,0.0,109.0,9.0,25.0,549.0,44.0,1
2,0003_01,Europa,0.0,A/0/S,TRAPPIST-1e,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,2
3,0003_02,Europa,0.0,A/0/S,TRAPPIST-1e,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,2
4,0004_01,Earth,0.0,F/1/S,TRAPPIST-1e,16.0,0.0,303.0,70.0,151.0,565.0,2.0,1


In [637]:
X.drop(columns="PassengerId", inplace=True)

X.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,GroupSize
0,Europa,0.0,B/0/P,TRAPPIST-1e,39.0,0.0,0.0,0.0,0.0,0.0,0.0,1
1,Earth,0.0,F/0/S,TRAPPIST-1e,24.0,0.0,109.0,9.0,25.0,549.0,44.0,1
2,Europa,0.0,A/0/S,TRAPPIST-1e,58.0,1.0,43.0,3576.0,0.0,6715.0,49.0,2
3,Europa,0.0,A/0/S,TRAPPIST-1e,33.0,0.0,0.0,1283.0,371.0,3329.0,193.0,2
4,Earth,0.0,F/1/S,TRAPPIST-1e,16.0,0.0,303.0,70.0,151.0,565.0,2.0,1


In [638]:
numerical_cols = ["GroupSize", "Age", "RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]
categorical_cols = ["HomePlanet", "CryoSleep", "Cabin", "Destination", "VIP"]

numerical_transformer = Pipeline(
  steps=[
    ("imputer", SimpleImputer()),
  ]
)

categorical_transformer = Pipeline(
  steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
  ]
)

preprocessor = ColumnTransformer(
  transformers=[
    ("num", numerical_transformer, numerical_cols),
    ("cat", categorical_transformer, categorical_cols),
  ]
)

In [639]:
X = pd.DataFrame(preprocessor.fit_transform(X))
X.columns = numerical_cols + categorical_cols

X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Cabin,Destination,VIP
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,Europa,0.0,B/0/P,TRAPPIST-1e,0.0
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,Earth,0.0,F/0/S,TRAPPIST-1e,0.0
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,Europa,0.0,A/0/S,TRAPPIST-1e,1.0
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,Europa,0.0,A/0/S,TRAPPIST-1e,0.0
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,Earth,0.0,F/1/S,TRAPPIST-1e,0.0


In [640]:
X["CabinDeck"] = X["Cabin"].str[0]
X["CabinSide"] = X["Cabin"].str[-1]

X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Cabin,Destination,VIP,CabinDeck,CabinSide
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,Europa,0.0,B/0/P,TRAPPIST-1e,0.0,B,P
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,Earth,0.0,F/0/S,TRAPPIST-1e,0.0,F,S
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,Europa,0.0,A/0/S,TRAPPIST-1e,1.0,A,S
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,Europa,0.0,A/0/S,TRAPPIST-1e,0.0,A,S
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,Earth,0.0,F/1/S,TRAPPIST-1e,0.0,F,S


In [641]:
X.drop(columns="Cabin", inplace=True)

X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,VIP,CabinDeck,CabinSide
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,Europa,0.0,TRAPPIST-1e,0.0,B,P
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,Earth,0.0,TRAPPIST-1e,0.0,F,S
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,Europa,0.0,TRAPPIST-1e,1.0,A,S
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,Europa,0.0,TRAPPIST-1e,0.0,A,S
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,Earth,0.0,TRAPPIST-1e,0.0,F,S


In [642]:
for col in X.columns:
  print(X[col].dtype)

object
object
object
object
object
object
object
object
object
object
object
object
object


In [643]:
cols_to_change = numerical_cols + ["CryoSleep", "VIP"]

X[cols_to_change] = X[cols_to_change].apply(pd.to_numeric)
for col in X.columns:
  print(X[col].dtype)

float64
float64
float64
float64
float64
float64
float64
object
float64
object
float64
object
object


In [644]:
X["CabinSide"] = X["CabinSide"].map({"P": 0, "S": 1})
X["HomePlanet"] = X["HomePlanet"].map({"Earth": 0, "Mars": 1, "Europa": 2})
X["Destination"] = X["Destination"].map({"TRAPPIST-1e": 0, "PSO J318.5-22": 1, "55 Cancri e": 2})

X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,VIP,CabinDeck,CabinSide
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0,0.0,B,0
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,0,0.0,0,0.0,F,1
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,2,0.0,0,1.0,A,1
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,2,0.0,0,0.0,A,1
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,0,0.0,0,0.0,F,1


In [645]:
cols_to_change = ["CabinDeck"]

OH_encoder = OneHotEncoder()
OH_cols = pd.DataFrame(OH_encoder.fit_transform(X[cols_to_change]).toarray())
num_cols = X.drop(columns=cols_to_change)
X = pd.concat([num_cols, OH_cols], axis=1)

X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,VIP,CabinSide,0,1,2,3,4,5,6,7
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0,0.0,0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,0,0.0,0,0.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,2,0.0,0,1.0,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,2,0.0,0,0.0,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,0,0.0,0,0.0,1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [646]:
services = ["RoomService", "FoodCourt", "ShoppingMall", "Spa", "VRDeck"]

for col in services:
  X[col + "Used"] = X[col] > 0

for col in services:
  X[col + "Used"] = X[col + "Used"].map({False: 0, True: 1})


X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,...,3,4,5,6,7,RoomServiceUsed,FoodCourtUsed,ShoppingMallUsed,SpaUsed,VRDeckUsed
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,0,0.0,0,...,0.0,0.0,1.0,0.0,0.0,1,1,1,1,1
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0.0,1,1,0,1,1
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0,1,1,1,1
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,0,0.0,0,...,0.0,0.0,1.0,0.0,0.0,1,1,1,1,1


In [647]:
X["TotalSpent"] = X["RoomService"] + X["FoodCourt"] + X["ShoppingMall"] + X["Spa"] + X["VRDeck"]
X.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,...,4,5,6,7,RoomServiceUsed,FoodCourtUsed,ShoppingMallUsed,SpaUsed,VRDeckUsed,TotalSpent
0,1.0,39.0,0.0,0.0,0.0,0.0,0.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0
1,1.0,24.0,109.0,9.0,25.0,549.0,44.0,0,0.0,0,...,0.0,1.0,0.0,0.0,1,1,1,1,1,736.0
2,2.0,58.0,43.0,3576.0,0.0,6715.0,49.0,2,0.0,0,...,0.0,0.0,0.0,0.0,1,1,0,1,1,10383.0
3,2.0,33.0,0.0,1283.0,371.0,3329.0,193.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0,1,1,1,1,5176.0
4,1.0,16.0,303.0,70.0,151.0,565.0,2.0,0,0.0,0,...,0.0,1.0,0.0,0.0,1,1,1,1,1,1091.0


In [648]:
X["ServicesUsed"] = X["RoomServiceUsed"] + X["FoodCourtUsed"] + X["ShoppingMallUsed"] + X["SpaUsed"] + X["VRDeckUsed"]

In [649]:
X = X.rename(columns=lambda x: str(x))

In [650]:
#model = XGBClassifier(n_estimators=40, learning_rate=0.08, max_depth=9)
#model = LogisticRegression(random_state=0, max_iter=1000, solver="liblinear")
#model = RandomForestClassifier(n_estimators=100, max_depth=10)
model = CatBoostClassifier(iterations=1500, eval_metric='Accuracy', verbose=0)

scores = cross_val_score(model, X, y, cv=3, scoring="accuracy")
print(scores)
print(scores.mean())

[0.79192547 0.80779848 0.82050397]
0.8067426390579278


In [476]:
model.fit(X, y)

<catboost.core.CatBoostClassifier at 0x2525c1dba60>

In [477]:
test_data = pd.read_csv("test.csv")

test_data.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name
0,0013_01,Earth,True,G/3/S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning
1,0018_01,Earth,False,F/4/S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers
2,0019_01,Europa,True,C/0/S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus
3,0021_01,Europa,False,C/1/S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter
4,0023_01,Earth,False,F/5/S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez


In [478]:
test_data_passenger_id = test_data["PassengerId"]
test_data_passenger_id

0       0013_01
1       0018_01
2       0019_01
3       0021_01
4       0023_01
         ...   
4272    9266_02
4273    9269_01
4274    9271_01
4275    9273_01
4276    9277_01
Name: PassengerId, Length: 4277, dtype: object

In [479]:
test_data = test_data.drop(columns=["Name", "PassengerId"])

test_data["CryoSleep"] = test_data["CryoSleep"].map({False: 0, True: 1})
test_data["VIP"] = test_data["VIP"].map({False: 0, True: 1})

group_data = test_data_passenger_id.str.split("_")
group_data = list(group_data)
group_ids = []
for i in range(len(group_data)):
  group_ids.append(group_data[i][0])

group_sizes = [1]
current = group_ids[0]
for i in range(1, len(group_ids)):
  if group_ids[i] == current:
    group_sizes[-1] += 1
  else:
    group_sizes.append(1)
    current = group_ids[i]

group_sizes_column = []
for number in group_sizes:
  for i in range(0, number):
    group_sizes_column.append(number)

test_data["GroupSize"] = group_sizes_column

test_data = pd.DataFrame(preprocessor.transform(test_data))

test_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.0,27.0,0.0,0.0,0.0,0.0,0.0,Earth,1.0,G/3/S,TRAPPIST-1e,0.0
1,1.0,19.0,0.0,9.0,0.0,2823.0,0.0,Earth,0.0,F/4/S,TRAPPIST-1e,0.0
2,1.0,31.0,0.0,0.0,0.0,0.0,0.0,Europa,1.0,C/0/S,55 Cancri e,0.0
3,1.0,38.0,0.0,6652.0,0.0,181.0,585.0,Europa,0.0,C/1/S,TRAPPIST-1e,0.0
4,1.0,20.0,10.0,0.0,635.0,0.0,0.0,Earth,0.0,F/5/S,TRAPPIST-1e,0.0


In [480]:
test_data.columns = numerical_cols + categorical_cols

test_data.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Cabin,Destination,VIP
0,1.0,27.0,0.0,0.0,0.0,0.0,0.0,Earth,1.0,G/3/S,TRAPPIST-1e,0.0
1,1.0,19.0,0.0,9.0,0.0,2823.0,0.0,Earth,0.0,F/4/S,TRAPPIST-1e,0.0
2,1.0,31.0,0.0,0.0,0.0,0.0,0.0,Europa,1.0,C/0/S,55 Cancri e,0.0
3,1.0,38.0,0.0,6652.0,0.0,181.0,585.0,Europa,0.0,C/1/S,TRAPPIST-1e,0.0
4,1.0,20.0,10.0,0.0,635.0,0.0,0.0,Earth,0.0,F/5/S,TRAPPIST-1e,0.0


In [481]:
test_data["CabinDeck"] = test_data["Cabin"].str[0]
test_data["CabinSide"] = test_data["Cabin"].str[-1]
test_data.drop(columns="Cabin", inplace=True)

cols_to_change = numerical_cols + ["CryoSleep", "VIP"]
test_data[cols_to_change] = test_data[cols_to_change].apply(pd.to_numeric)

test_data["CabinSide"] = test_data["CabinSide"].map({"P": 0, "S": 1})
test_data["HomePlanet"] = test_data["HomePlanet"].map({"Earth": 0, "Mars": 1, "Europa": 2})
test_data["Destination"] = test_data["Destination"].map({"TRAPPIST-1e": 0, "PSO J318.5-22": 1, "55 Cancri e": 2})

cols_to_change = ["CabinDeck"]
OH_encoder = OneHotEncoder()
OH_cols = pd.DataFrame(OH_encoder.fit_transform(test_data[cols_to_change]).toarray())
num_cols = test_data.drop(columns=cols_to_change)
test_data = pd.concat([num_cols, OH_cols], axis=1)

for col in services:
  test_data[col + "Used"] = test_data[col] > 0
for col in services:
  test_data[col + "Used"] = test_data[col + "Used"].map({False: 0, True: 1})

#test_data[services] = pd.DataFrame(StandardScaler().fit_transform(test_data[services]))
test_data["TotalSpent"] = test_data["RoomService"] + test_data["FoodCourt"] + test_data["ShoppingMall"] + test_data["Spa"] + test_data["VRDeck"]

test_data = test_data.rename(columns=lambda test_data: str(test_data))

test_data.head()

Unnamed: 0,GroupSize,Age,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet,CryoSleep,Destination,...,4,5,6,7,RoomServiceUsed,FoodCourtUsed,ShoppingMallUsed,SpaUsed,VRDeckUsed,TotalSpent
0,1.0,27.0,0.0,0.0,0.0,0.0,0.0,0,1.0,0,...,0.0,0.0,1.0,0.0,0,0,0,0,0,0.0
1,1.0,19.0,0.0,9.0,0.0,2823.0,0.0,0,0.0,0,...,0.0,1.0,0.0,0.0,0,1,0,1,0,2832.0
2,1.0,31.0,0.0,0.0,0.0,0.0,0.0,2,1.0,2,...,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0
3,1.0,38.0,0.0,6652.0,0.0,181.0,585.0,2,0.0,0,...,0.0,0.0,0.0,0.0,0,1,0,1,1,7418.0
4,1.0,20.0,10.0,0.0,635.0,0.0,0.0,0,0.0,0,...,0.0,1.0,0.0,0.0,1,0,1,0,0,645.0


In [482]:
predictions = model.predict(test_data)

predictions = map(bool, predictions)
predictions = list(predictions)

predictions

[True,
 False,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 True,
 True,
 True,
 False,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 False,
 True,
 True,


In [483]:
def make_submission(predictions):
  predictions_df = pd.DataFrame(data={"PassengerId": test_data_passenger_id, "Transported": predictions})
  predictions_df.to_csv("submission.csv", index=False)

make_submission(predictions)