# FEDOT practical approach
## The following code can be used in an Jupyter Notebook (Python 3.8.X, FEDOT 0.7.1).

In [None]:
Import Python modules and prepare the training data set.

In [None]:
import pandas as pd
import warnings
from sklearn.preprocessing import LabelEncoder
warnings.filterwarnings("ignore")
import logging
logging.raiseExceptions = False

In [None]:
filename = 'MOVIESTREAM_CHURN_RED_TRAIN.csv'
dataframe = pd.read_csv(filename)
dataframe['YRS_CURRENT_EMPLOYER'] = dataframe['YRS_CURRENT_EMPLOYER'].fillna(0)
dataframe['IS_CHURNER'] = dataframe['IS_CHURNER'].replace(['no'], 0)
dataframe['IS_CHURNER'] = dataframe['IS_CHURNER'].replace(['yes'], 1)
array = dataframe.values
ID_train = array[:,0]
X_train = array[:,1:-1]
y_train = array[:,-1]
X_train = X_train.astype('float32')
y_train = LabelEncoder().fit_transform(y_train.astype('str'))

__Build a Classification model__. A time budget of five minutes has been set. The _n_jobs_ parameter establishes the number of jobs to run in parallel. _metric_ parameter represents the evaluation metric to evaluate the model performance. 

In [44]:
# New instance to be used as AutoML tool
from fedot.api.main import Fedot
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.quality_metrics_repository import MetricsRepository

metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.accuracy)
auto_model = Fedot(problem='classification', n_jobs=2, seed=42, with_tuning=True, metric = metric_function, timeout=5)

_fit_ method optimizes the machine learning model.

In [None]:
# run of the AutoML-based model generation
pipeline = auto_model.fit(features=X_train, target=y_train)

__Save optimized model to a file__. 

In [None]:
pipeline = auto_model.current_pipeline
path_to_save = './fedot/auto_model.json'
pipeline.save(path=path_to_save, create_subdir=True, is_datetime_in_path=False)

__Evaluate the best model__. 

In [None]:
from sklearn.metrics import accuracy_score
y_pred = auto_model.predict(X_train)
acc = accuracy_score(y_train, y_pred)
print("Accuracy: %.3f" % acc)

__Restore tuned model from the file__. 

In [None]:
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.quality_metrics_repository import MetricsRepository
from fedot.core.pipelines.pipeline import Pipeline
metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.accuracy)
path_to_load = './fedot/auto_model.json'
saved_model = Pipeline().load(path_to_load)
auto_model = Fedot(problem='classification', metric = metric_function)
auto_model.fit(features=X_train, target=y_train, predefined_model=saved_model)

Prepare the testing data set.

In [None]:
filename = 'MOVIESTREAM_CHURN_RED_TEST.csv'
dataframe = pd.read_csv(filename)
dataframe['YRS_CURRENT_EMPLOYER'] = dataframe['YRS_CURRENT_EMPLOYER'].fillna(0)
array = dataframe.values
ID_test = array[:,0]
X_test = array[:,1:-1]
y_test = array[:,-1]
print(X_test.shape, y_test.shape)
X_test = X_test.astype('float32')
y_test = LabelEncoder().fit_transform(y_test.astype('str'))

__Score the machine learning model__: the predicted values and the probability estimates for each value are obtained. 

In [None]:
y_pred = auto_model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy: %.3f" % acc)

Predicting on test data using the tuned model. The _predict_proba_ function outputs predicted classes, as well as the probability estimates for each of the classes (confidence).

In [None]:
probs= auto_model.predict_proba(features=X_test)
pred_confidence = []
for i in range(len(probs)):
  if (y_pred[i]==0):
    pred_confidence.append(1-probs[i,0])
  else:
    pred_confidence.append(probs[i,0])

ds_id = pd.DataFrame(ID_test, columns = ["ID"])
ds_actual = pd.DataFrame(y_test, columns = ["ACTUALVALUE"])
ds_pred = pd.DataFrame(y_pred, columns = ["PREDICTEDVALUE"])
ds_prob = pd.DataFrame(pred_confidence, columns = ["PREDICTIONCONFIDENCE"])
dataframe = pd.concat([ds_id, ds_actual, ds_pred, ds_prob], axis=1)
dataframe.to_csv('fedot_test_pred.csv',index=False)

__Build the confusion matrix__

In [None]:
from sklearn.metrics import confusion_matrix

conf_matrix = confusion_matrix(y_true=y_test, y_pred=y_pred)

__Display the confusion matrix__

In [None]:
from mlxtend.plotting import plot_confusion_matrix
 
fig, ax = plot_confusion_matrix(conf_mat=conf_matrix, figsize=(2, 2), cmap=plt.cm.Greens)
plt.xlabel('Predictions', fontsize=11)
plt.ylabel('Actuals', fontsize=11)
plt.title('Confusion Matrix', fontsize=11)
plt.show()

__Calculate the performance metrics__

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

print('Precision: %.3f' % precision_score(y_test, y_pred))
print('Recall: %.3f' % recall_score(y_test, y_pred))
print('Accuracy: %.3f' % accuracy_score(y_test, y_pred))
print('F1 Score: %.3f' % f1_score(y_test, y_pred))