-
Notifications
You must be signed in to change notification settings - Fork 389
/
binary_classifier_Titanic.py
47 lines (36 loc) · 1.19 KB
/
binary_classifier_Titanic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import numpy as np
from supervised.automl import AutoML
import os
from sklearn.metrics import accuracy_score
"""
df = pd.read_csv("tests/data/Titanic/train.csv")
X = df[df.columns[2:]]
y = df["Survived"]
automl = AutoML(mode="Explain")
automl.fit(X, y)
pred = automl.predict(X)
print("Train accuracy", accuracy_score(y, pred))
test = pd.read_csv("tests/data/Titanic/test_with_Survived.csv")
pred = automl.predict(test)
print("Test accuracy", accuracy_score(test["Survived"], pred))
"""
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from supervised import AutoML
train = pd.read_csv(
"https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/train.csv"
)
print(train.head())
X = train[train.columns[2:]]
y = train["Survived"]
# automl = AutoML(mode="Compete") # default mode is Explain
automl = AutoML(total_time_limit=120) # default mode is Explain
automl.fit(X, y)
test = pd.read_csv(
"https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/test_with_Survived.csv"
)
predictions = automl.predict(test)
print(predictions)
print(f"Accuracy: {accuracy_score(test['Survived'], predictions)*100.0:.2f}%")