**Importing all libraries**

In [23]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier

**Loading the data**

In [24]:
train_data = pd.read_csv("/content/sample_data/train.csv", index_col="PassengerId")
test_data = pd.read_csv("/content/sample_data/test.csv", index_col="PassengerId")

**Feature selection**

In [25]:
features = ["Pclass", "Sex", "SibSp", "Parch"]
X = pd.get_dummies(train_data[features])
X_test = pd.get_dummies(test_data[features])
y = train_data["Survived"]

**Data Normalization**

In [26]:
ss = StandardScaler()
X_scaled = ss.fit_transform(X)
X_test_scaled = ss.transform(X_test)
print(X_scaled)
print(X_test_scaled)

[[ 0.82737724  0.43279337 -0.47367361 -0.73769513  0.73769513]
 [-1.56610693  0.43279337 -0.47367361  1.35557354 -1.35557354]
 [ 0.82737724 -0.4745452  -0.47367361  1.35557354 -1.35557354]
 ...
 [ 0.82737724  0.43279337  2.00893337  1.35557354 -1.35557354]
 [-1.56610693 -0.4745452  -0.47367361 -0.73769513  0.73769513]
 [ 0.82737724 -0.4745452  -0.47367361 -0.73769513  0.73769513]]
[[ 0.82737724 -0.4745452  -0.47367361 -0.73769513  0.73769513]
 [ 0.82737724  0.43279337 -0.47367361  1.35557354 -1.35557354]
 [-0.36936484 -0.4745452  -0.47367361 -0.73769513  0.73769513]
 ...
 [ 0.82737724 -0.4745452  -0.47367361 -0.73769513  0.73769513]
 [ 0.82737724 -0.4745452  -0.47367361 -0.73769513  0.73769513]
 [ 0.82737724  0.43279337  0.76762988 -0.73769513  0.73769513]]


**Modeling**

In [27]:
rfc = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)
rfc.fit(X_scaled, y)
lg = LogisticRegression(random_state=10, max_iter=1000, C=20, solver='lbfgs')
lg.fit(X_scaled, y)
lgb = LGBMClassifier()
lgb.fit(X_scaled, y)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

**Ensembling**

In [28]:
ensemble_model = VotingClassifier(estimators=[
    ("logit", lg),
    ("rf", rfc),
    ("lgb", lgb),
], voting="hard")

ensemble_model.fit(X_scaled, y)
preds = ensemble_model.predict(X_test_scaled)

**Output**

In [29]:
output = pd.DataFrame({'PassengerId': test_data.index,
                       'Survived': preds})

output.to_csv('submission.csv', index=False)