In [35]:
import numpy as np
import ascends as asc
import keras
import ast

In [36]:
# Ascends-toolkit was developed to be used via command-line interface or web-based interface; however, if needed,
# users may use ascends-toolkit's API. The following shows an example of performing a regression task using 
# the core ascends-toolkit APIs

csv_file = 'BostonHousing.csv'
cols_to_remove = []
target_col = 'medv'

# Load data from csv file
# A standard csv file can be loaded and shuffled as follows

data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, None, cols_to_remove, target_col)

In [39]:
# Performing correlation analysis
# Correlation analysis can be performed as follows
# fs_dict will only contain the top-k features for each criteria (e.g., PCC)
# final_report will contain the full evaluation scores for each feature

k = 10
fs_dict, final_report = asc.correlation_analysis_all(data_df, target_col, top_k = k, file_to_save = None, save_chart = None)

print("Top-k features for each criteria")
print(fs_dict)
print("")
print("Full Correlation Analysis report")
print(final_report)

# To use top-k (k=10) features based on PCC (Pearson's correlation coefficient)

input_col = fs_dict['PCC']

# We need to load the file again
data_df, x_train, y_train, header_x, header_y = asc.data_load_shuffle(csv_file, input_col, cols_to_remove, target_col)


Top-k features for each criteria
{'PCC': ['rm', 'zn', 'b', 'dis', 'chas', 'age', 'rad', 'crim', 'nox', 'tax'], 'PCC_SQRT': ['lstat', 'rm', 'ptratio', 'indus', 'tax', 'nox', 'crim', 'rad', 'age', 'zn'], 'MIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MAS': ['chas', 'b', 'age', 'zn', 'rad', 'dis', 'nox', 'ptratio', 'crim', 'tax'], 'MEV': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'zn'], 'MCN': ['zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax', 'ptratio'], 'MCN_general': ['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax'], 'GMIC': ['lstat', 'rm', 'nox', 'age', 'indus', 'ptratio', 'crim', 'tax', 'dis', 'rad'], 'TIC': ['lstat', 'rm', 'nox', 'indus', 'ptratio', 'age', 'crim', 'tax', 'dis', 'rad']}

Full Correlation Analysis report
              MIC       MAS       MEV       MCN  MCN_general      GMIC  \
age      0.420689  0.099268  0.420689  5.321928          2.0  0.368688   
b        0.272

In [40]:
# Generating a default model parameters
model_parameters = asc.default_model_parameters() 

# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)


MAE =  2.814968381964642 , R2 =  0.6958314131579451


In [41]:
# tuning hyper parameters
tuned_parameters = asc.hyperparameter_tuning(model_type, x_train, y_train
                                             , num_of_folds, scaler_option
                                           , n_iter=1000, random_state=0, verbose=1)

Fitting 5 folds for each of 1000 candidates, totalling 5000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    5.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   16.5s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   41.4s
[Parallel(n_jobs=-1)]: Done 784 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 1234 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 1784 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 2434 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 3184 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 4034 tasks      | elapsed:  6.7min
[Parallel(n_jobs=-1)]: Done 4984 tasks      | elapsed:  8.4min
[Parallel(n_jobs=-1)]: Done 5000 out of 5000 | elapsed:  8.4min finished


In [42]:
# Model Training
model_type = 'RF' # model type can be 'LR','RF','NN','KR','BR','SVM'
scaler_option = 'StandardScaler' # scaler option can be 'False','StandardScaler','Normalizer','MinMaxScaler','RobustScaler'
num_of_folds = 5
model = asc.define_model_regression(model_type, model_parameters, x_header_size = x_train.shape[1])
predictions, actual_values = asc.train_and_predict(model, x_train, y_train, scaler_option=scaler_option, num_of_folds=num_of_folds)
MAE, R2 = asc.evaluate(predictions, actual_values)

# Printing the performance of regression task
print("MAE = ", MAE,", R2 = ", R2)


MAE =  2.793541478026997 , R2 =  0.702740528877592


In [45]:
# save prediction-actual comparison chart
asc.save_comparison_chart(predictions, actual_values, "comparison_chart.png")

In [47]:
# save prediction-actual result in a csv file
asc.save_test_data(predictions, actual_values, "result.csv")

In [49]:
# saving the trained model in a file
asc.train_and_save(model, "trained_model", model_type
                        , input_cols=header_x, target_col=header_y
                        , x_train=x_train, y_train=y_train, scaler_option=scaler_option, path_to_save = '.', MAE=MAE, R2=R2)

* Training initiated ..
* Training done.
* Trained model saved to file: ./trained_model.pkl
