diff --git a/binary_classifier_Titanic.py b/binary_classifier_Titanic.py new file mode 100644 index 00000000..5b6fdb8a --- /dev/null +++ b/binary_classifier_Titanic.py @@ -0,0 +1,47 @@ +import pandas as pd +import numpy as np +from supervised.automl import AutoML +import os + +from sklearn.metrics import accuracy_score + +""" +df = pd.read_csv("tests/data/Titanic/train.csv") + +X = df[df.columns[2:]] +y = df["Survived"] + +automl = AutoML(mode="Explain") +automl.fit(X, y) +pred = automl.predict(X) + +print("Train accuracy", accuracy_score(y, pred)) +test = pd.read_csv("tests/data/Titanic/test_with_Survived.csv") +pred = automl.predict(test) +print("Test accuracy", accuracy_score(test["Survived"], pred)) +""" + +import pandas as pd +import numpy as np +from sklearn.metrics import accuracy_score +from supervised import AutoML + +train = pd.read_csv( + "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/train.csv" +) +print(train.head()) + +X = train[train.columns[2:]] +y = train["Survived"] + +# automl = AutoML(mode="Compete") # default mode is Explain +automl = AutoML(total_time_limit=120) # default mode is Explain + +automl.fit(X, y) + +test = pd.read_csv( + "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/test_with_Survived.csv" +) +predictions = automl.predict(test) +print(predictions) +print(f"Accuracy: {accuracy_score(test['Survived'], predictions)*100.0:.2f}%") diff --git a/loader.py b/loader.py new file mode 100644 index 00000000..4992852a --- /dev/null +++ b/loader.py @@ -0,0 +1,11 @@ +from supervised import AutoML +import pandas as pd +from sklearn.metrics import accuracy_score + +test = pd.read_csv( + "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/test_with_Survived.csv" +) + +automl = AutoML(results_path="AutoML_1") +p=automl.predict(test,"Ensemble") +print(f"Accuracy: {accuracy_score(test['Survived'], p)*100.0:.2f}%") \ No newline at end of file diff --git a/supervised/automl.py b/supervised/automl.py index 61425cee..ebc03bd0 100644 --- a/supervised/automl.py +++ b/supervised/automl.py @@ -336,7 +336,7 @@ def fit(self, X, y, sample_weight=None, cv=None): """ return self._fit(X, y, sample_weight, cv) - def predict(self, X): + def predict(self, X, model=None): """ Computes predictions from AutoML best model. @@ -353,7 +353,7 @@ def predict(self, X): Raises: AutoMLException: Model has not yet been fitted. """ - return self._predict(X) + return self._predict(X, model) def predict_proba(self, X): """ diff --git a/supervised/base_automl.py b/supervised/base_automl.py index e57f344c..d7db67e1 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1301,9 +1301,21 @@ def _base_predict(self, X, model=None): else: return predictions - def _predict(self, X): + def _predict(self, X, modelname=None): - predictions = self._base_predict(X) + model = None + if not self._models: + self.load(self.results_path) + if modelname != None: + for i in self._models: + if modelname == i.get_name(): + model = i + + + if modelname != None and model == None: + raise AutoMLException("invaild model") + + predictions = self._base_predict(X, model) # Return predictions # If classification task the result is in column 'label' # If regression task the result is in column 'prediction'