In [32]:

import pandas as pd
from sklearn import svm
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression


from sklearn.model_selection import KFold, cross_val_score, GridSearchCV

In [24]:
train_path = '../../preprocess_train_dataset/pre05_train.csv'
test_path = '../../preprocess_test_dataset/pre05_test.csv'
pid_path = '../../spaceship-titanic_rawData/sample_submission.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)
pid = pd.read_csv(pid_path)

train_x = train.drop(['Transported', 'Age'], axis=1)
test = test.drop(['Age'], axis=1)
train_y = train['Transported']
sub_pid = pid['PassengerId']

In [21]:
train_y

0       False
1        True
2       False
3       False
4        True
        ...  
8688    False
8689    False
8690     True
8691    False
8692     True
Name: Transported, Length: 8693, dtype: bool

In [33]:
rf_clf = RandomForestClassifier()
log_clf = LogisticRegression(solver = 'liblinear', penalty = 'l2', C=350, class_weight = 'balanced', max_iter =300)
svc_clf = svm.SVC(probability=True)
knn_clf = KNeighborsClassifier()
ada_clf = AdaBoostClassifier()

voting_clf = VotingClassifier(
    estimators=[
        ('log', log_clf),
        ('rf', rf_clf),
        ('ada', ada_clf)
    ], voting='soft'
)

In [34]:

k_folds = KFold(n_splits = 8)

scores = cross_val_score(voting_clf, train_x, train_y, cv = k_folds)

print("Cross Validation Scores: ", scores)
print("\nAverage CV Score: ", scores.mean())



Cross Validation Scores:  [0.74517019 0.72125115 0.76724931 0.73137075 0.7350506  0.74493554
 0.76427256 0.74677716]

Average CV Score:  0.7445096579193922


In [35]:
voting_clf.fit(train_x, train_y)

In [38]:
pred = voting_clf.predict(test)

clf_submission = pd.DataFrame({'PassengerId': sub_pid, 'Transported': pred})

clf_submission.isna().sum()

PassengerId    0
Transported    0
dtype: int64

In [39]:
clf_submission.to_csv('../../output_prediction/voting01_submission.csv', index=False, index_label='PassengerId')