In [1]:
# Attribute importance for classification or regression
# Ranks attributes according to their importance in predicting a target.
# help(oml.ai)

In [2]:
# Loading supporting packages

import numpy 
import pandas as pd
import scipy 
import matplotlib 
import pyreadline 
import sklearn
import cx_Oracle
import oml
from oml import automl
from oml import algo
from oml.automl import FeatureSelection
import time
from datetime import datetime

In [3]:
# Connect to Oracle Database 
oml.connect(user='MLDEVLX1', password='MLDEVLX1', host='whf00atk.in.oracle.com', port=1521, service_name='MLPDB1',
           automl=True)

# Verify that the connection exists.
oml.isconnected()

True

In [4]:
# help(oml.ai)
# The results of attribute importance are the attributes
#    of the build data ranked according to their predictive influence

In [5]:
#-------------- IRIS dataset

# Accessing DB table via python object
oml_iris = oml.sync(table = 'DF_IRIS')

# print(type(oml_iris))
# print("\n",oml_iris.head())
# print("\n",oml_iris.tail())

# # dimesnions of data frame (Rows,Columns)
# print("\n",oml_iris.shape)

# print("\n",oml.dir())

# Split the data
train_dat, test_dat = oml_iris.split()
train_x = train_dat.drop('Species')
train_y = train_dat['Species']

#----------------- AI feature selection

# Specify settings.
setting = {'ODMS_SAMPLING':'ODMS_SAMPLING_DISABLE'}

# Create an oml AI model object.
ai_mod = oml.ai(**setting)

# Fit the AI model according to the build data and parameter settings.
ai_mod = ai_mod.fit(train_x, train_y)

# Show the model details.
# print(ai_mod)

print("\n", type(ai_mod.importance))
print("\n", ai_mod.importance)

#----------------------- auto ML feature selection ,dt - descision tree 

print("\ndescision tree")
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy')
print("\n", fs)

%time selected_features = fs.reduce('dt', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)


#----------------------- auto ML feature selection , rf - Random Forest

print("\nrandom forest")
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy')
print("\n", fs)

%time selected_features = fs.reduce('rf', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)


 <class 'oml.core.frame.DataFrame'>

        variable  importance  rank
0   Petal.Width    0.621026     1
1  Petal.Length    0.362197     2
2  Sepal.Length   -0.071832     3
3   Sepal.Width   -0.104237     4
Wall time: 4.36 s

 <oml.automl.interface.oml.FeatureSelection object at 0x0000023073F7E7C8>
Wall time: 1min 36s

 [3]
Wall time: 1.04 ms

 Selected columns: ['Petal.Width']
Wall time: 4.61 s

 <oml.automl.interface.oml.FeatureSelection object at 0x0000023073F7E808>
Wall time: 1min 33s

 [3]
Wall time: 0 ns

 Selected columns: ['Petal.Width']


In [5]:
#-------------- CANCER dataset

# Cancer data set feature selection
oml_cancer = oml.sync(table = "NR_DF_CANCER")

# print(oml_cancer.head())

# Split the data
train_dat, test_dat = oml_cancer.split()
train_x = train_dat.drop('target')
train_y = train_dat['target']

#----------------- AI feature selection

# Specify settings.
setting = {'ODMS_SAMPLING':'ODMS_SAMPLING_DISABLE'}

# Create an oml AI model object.
ai_mod = oml.ai(**setting)

# Fit the AI model according to the build data and parameter settings.
ai_mod = ai_mod.fit(train_x, train_y)

# Show the model details.
# print(ai_mod)

print("\n", type(ai_mod.importance))
print("\n", ai_mod.importance)

#----------------------- auto ML feature selection , dt - descision tree 

print("\ndescision tree")
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy')
print("\n", fs)

%time selected_features = fs.reduce('dt', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)
print("\nNumber of columns:", train_x_reduced.shape[1])

#----------------------- auto ML feature selection , rf - Random Forest

print("\nrandom forest")
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy')
print("\n", fs)

%time selected_features = fs.reduce('rf', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)
print("\nNumber of columns:", train_x_reduced.shape[1])

#----------------------- auto ML feature selection , svm_linear - Support Vector Machine with linear kernel

print("\nSVM")
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy')
print("\n", fs)

%time selected_features = fs.reduce('svm_linear', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)
print("\nNumber of columns:", train_x_reduced.shape[1])

print("\ncomparison od svm to top 20 cols")
# 0           worst perimeter    0.627039     1  yes
# 1                worst area    0.614483     2  yes
# 2              worst radius    0.598537     3  yes
# 3      worst concave points    0.589148     4  yes
# 4       mean concave points    0.579535     5  yes
# 5            mean perimeter    0.493424     6
# 6                area error    0.489751     7
# 7               mean radius    0.465557     8
# 8           worst concavity    0.452043     9
# 9                 mean area    0.442537    10  yes
# 10           mean concavity    0.430775    11  yes
# 11          perimeter error    0.299313    12
# 12             radius error    0.296743    13  yes
# 13        worst compactness    0.262066    14  yes
# 14         mean compactness    0.250231    15
# 15          concavity error    0.201177    16
# 16     concave points error    0.158728    17
# 17             mean texture    0.139699    18
# 18            worst texture    0.117972    19  yes
# 19         worst smoothness    0.107560    20


 <class 'oml.core.frame.DataFrame'>

                    variable  importance  rank
0           worst perimeter    0.627039     1
1                worst area    0.614483     2
2              worst radius    0.598537     3
3      worst concave points    0.589148     4
4       mean concave points    0.579535     5
5            mean perimeter    0.493424     6
6                area error    0.489751     7
7               mean radius    0.465557     8
8           worst concavity    0.452043     9
9                 mean area    0.442537    10
10           mean concavity    0.430775    11
11          perimeter error    0.299313    12
12             radius error    0.296743    13
13        worst compactness    0.262066    14
14         mean compactness    0.250231    15
15          concavity error    0.201177    16
16     concave points error    0.158728    17
17             mean texture    0.139699    18
18            worst texture    0.117972    19
19         worst smoothness    0.107560  

In [6]:
ms = automl.ModelSelection(mining_function='classification', 
                           score_metric='f1_macro', parallel=4)

%time selected_model = ms.select(train_x, train_y, k=1)
selected_model



Wall time: 38min 1s



Algorithm Name: Support Vector Machine

Mining Function: CLASSIFICATION

Target: target

Settings: 
                    setting name                 setting value
0                      ALGO_NAME  ALGO_SUPPORT_VECTOR_MACHINES
1          CLAS_WEIGHTS_BALANCED                           OFF
2                   ODMS_DETAILS                  ODMS_DISABLE
3   ODMS_MISSING_VALUE_TREATMENT       ODMS_MISSING_VALUE_AUTO
4                  ODMS_SAMPLING         ODMS_SAMPLING_DISABLE
5                      PREP_AUTO                            ON
6         SVMS_COMPLEXITY_FACTOR                            10
7            SVMS_CONV_TOLERANCE                         .0001
8           SVMS_KERNEL_FUNCTION                 SVMS_GAUSSIAN
9                SVMS_NUM_PIVOTS                           200
10                  SVMS_STD_DEV            5.3999999999999995

Attributes: 
area error
compactness error
concave points error
concavity error
fractal dimension error
mean area
mean compactness
mean concave

In [6]:
%time fs = automl.FeatureSelection(mining_function = 'classification', score_metric = 'accuracy', parallel=4)
print("\n", fs)

%time selected_features = fs.reduce('dt', train_x, train_y)
print("\n", selected_features)

%time train_x_reduced = train_x[:,selected_features]
print("\n", "Selected columns:",train_x_reduced.columns)



Wall time: 4.52 s

 <oml.automl.interface.oml.FeatureSelection object at 0x0000023073F43E48>
Wall time: 1min 36s

 [3]
Wall time: 996 µs

 Selected columns: ['Petal.Width']
