In [1]:
import sys
sys.path.append('..')
import numpy as np
import ascends as asc
import keras
import ast
from sklearn.metrics import classification_report
import pickle

Using TensorFlow backend.


In [2]:
# 1. Regression API reference

# * NOTE: Ascends-toolkit was developed to be used via command-line interface or web-based interface; however, if needed,
# users may use ascends-toolkit's API. The following shows an example of performing a classification task using 
# the core ascends-toolkit APIs. 

csv_file = '../data/iris.csv'
cols_to_remove = []
target_col = 'Name'
input_col = None

# Classifier will need a mapping between categorical values to numerical values
mapping = {'Name': {'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2}}

# Load data from csv file
# A standard csv file can be loaded and shuffled as follows

data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col, map_all = mapping, random_state = 0)

In [3]:
# check if data is loaded
data_df[:10]

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Name
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [4]:
# Testing if asc.data_load_shuffle() worked correctly

assert data_df['SepalLength'].values[0] == 5.1
assert data_df['SepalWidth'].values[3] == 3.1

In [5]:
# Generating a default model parameters
model_parameters = asc.default_model_parameters_classifier() 

In [6]:
model_type = 'RF'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_classifier(model_type, model_parameters, x_header_size = x_train.shape[1], random_state = 0)   

In [7]:
# scikit-learn's classification report can be used to understand the accuracy of the trained model
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
accuracy = asc.evaluate_classifier(predictions, actual_values)
print("")
print("* Classification Report")
print(classification_report(actual_values, predictions))


* Classification Report
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        50
         1.0       0.94      0.94      0.94        50
         2.0       0.94      0.94      0.94        50

   micro avg       0.96      0.96      0.96       150
   macro avg       0.96      0.96      0.96       150
weighted avg       0.96      0.96      0.96       150



In [8]:
# test if we achieved the correct accuracy
assert accuracy == 0.96
assert list(predictions) == [2, 1, 0, 2, 0, 2, 0, 1, 1, 1, 2, 1, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1, 0, 0,
 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 2, 2,
 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, 1,
 0, 0, 1, 0, 2, 1, 0, 1, 2, 1, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2,
 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 0, 1, 1, 0, 0, 1, 0, 2, 1, 2, 1, 0, 2,
 0, 2, 0, 0, 2, 0, 2, 1, 1, 1, 2, 2, 2, 2, 0, 1, 2, 2, 0, 1, 1, 2, 1, 0,
 0, 0, 2, 1, 2, 0,]

In [9]:
asc.train_and_save_classifier(model, "model.pkl", model_type
                            , input_cols=header_x, target_col=header_y
                            , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', accuracy=accuracy)

* Training initiated ..
* Training done.
* Trained model saved to file: model.pkl


In [10]:
# You can load the saved model by using pickle package
model_dict = pickle.load(open('model.pkl', 'rb'))

# Let's assume that we have a input as follows
x_to_predict = [[4.5, 2.4, 1.2, 4.2]]

# You can scale the data using the loaded scaler
scaler = model_dict['fitted_scaler_x']
x_to_predict = scaler.transform(x_to_predict)
print("Scaled x_to_predict = ", x_to_predict)

# Making prediction can be done as follows
predicted_y = model.predict(x_to_predict)

# Original prediction value will not be a class name, so you need to find out the class name by doing:
for key in mapping['Name'].keys():
    if mapping['Name'][key]==predicted_y[0]:
        print("* Your model thinks that it's a ", key)
        
        # test if our prediction is correctly done
        assert key=='Iris-setosa'

Scaled x_to_predict =  [[-1.62768837 -1.51337555 -1.45500383  3.94594202]]
* Your model thinks that it's a  Iris-setosa


In [11]:
# 2. Regression API reference

# * NOTE: Ascends-toolkit was developed to be used via command-line interface or web-based interface; however, if needed,
# users may use ascends-toolkit's API. The following shows an example of performing a regression task using 
# the core ascends-toolkit APIs

csv_file = '../data/BostonHousing.csv'
cols_to_remove = []
target_col = 'medv'

# Load data from csv file
# A standard csv file can be loaded and shuffled as follows

data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, None, cols_to_remove, target_col)

In [12]:
# Performing correlation analysis
# Correlation analysis can be performed as follows
# fs_dict will only contain the top-k features for each criteria (e.g., PCC)
# final_report will contain the full evaluation scores for each feature

k = 10
fs_dict, final_report = asc.correlation_analysis_all(data_df, target_col, top_k = k, file_to_save = None, save_chart = None)

print("Top-k features for each criteria")
print(fs_dict)
print("")
print("Full Correlation Analysis report")
print(final_report)

# To use top-k (k=10) features based on PCC (Pearson's correlation coefficient)

input_col = fs_dict['PCC']

# We need to load the file again
data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col)

# testing correlation analysis report test

assert (final_report['MIC'][0]==0.42068867557196804)
assert (final_report['MEV'][0] == 0.42068867557196804)
assert (final_report['MCN'][0]==5.321928094887363)
assert (final_report['MCN_general'][0]==2.0)
assert (final_report['GMIC'][0]==0.3686876195636089)


* correlation_analysis_all
Top-k features for each criteria
{'PCC': ['rm', 'zn', 'b', 'dis', 'chas', 'age', 'rad', 'crim', 'nox', 'tax'], 'PCC_SQRT': ['lstat', 'rm', 'ptratio', 'indus', 'tax', 'nox', 'crim', 'rad', 'age', 'zn'], 'MIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MAS': ['chas', 'b', 'age', 'zn', 'rad', 'dis', 'nox', 'ptratio', 'crim', 'tax'], 'MEV': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MCN': ['zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio'], 'MCN_general': ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax'], 'GMIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'rad'], 'TIC': ['lstat', 'rm', 'nox', 'indus', 'ptratio', 'age', 'crim', 'tax', 'dis', 'rad']}

Full Correlation Analysis report
              MIC       MAS       MEV       MCN  MCN_general      GMIC  \
age      0.420689  0.099268  0.420689  5.321928          2.0 

In [17]:
# Generating a default model parameters
model_parameters = asc.default_model_parameters() 

# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1], random_state=0)
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)

# test if we achieved the correct result
assert MAE == 2.8159565143151717
assert R2 == 0.7011306697815759


MAE =  2.8159565143151717 , R2 =  0.7011306697815759


In [22]:
# tuning hyper parameters
tuned_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train
                                             , num_of_folds, scaler_option
                                           , n_iter=1000, random_state=0, verbose=1)

Fitting 5 folds for each of 1000 candidates, totalling 5000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   16.3s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   42.4s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  6.8min
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  8.4min
[Parallel(n_jobs=-1)]: Done 5000 out of 5000 | elapsed:  8.5min finished


In [27]:
# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, tuned_parameters, x_header_size = x_train.shape[1], random_state=0)
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)

# test if we achieved the correct result
assert MAE == 2.6527868127203824
assert R2 == 0.7799033856883656

MAE =  2.6527868127203824 , R2 =  0.7799033856883656


In [28]:
# save prediction-actual result in a csv file
asc.save_test_data(predictions, actual_values, "result.csv")

In [29]:
# saving the trained model in a file
asc.train_and_save(model, "trained_model", model_type
                        , input_cols=header_x, target_col=header_y
                        , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2)

* Training initiated ..
* Training done.
* Trained model saved to file: trained_model


In [26]:
# Model file loading and making a prediction can be done in the same way as the classification example