In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.externals import joblib
# sensitivity analysis
from SALib.sample import saltelli
from SALib.analyze import sobol
from SALib.test_functions import Ishigami

In [2]:
# load saved model
model = joblib.load('./training/regressor_mlp_new.pkl')

In [11]:
# merchanttype2, merchanttype3, topads, cashback, 
# cashbackval, price, prodrating, reviewcount, 
# negreview, posreview, answercnt, otheragreemean, 
# ratingmosthelpful, possentiment, negsentiment, sentipolarity, 
# reviewersrep, revpictotal, prodpicstotal
problem = {
  'num_vars': 19,
  'names': ['merchanttype2', 'merchanttype3', 'topads', 'cashback', 'cashbackval', 'price', 'prodrating', 
            'reviewcount', 'negreview', 'posreview', 'answercnt', 'otheragreemean', 'ratingmosthelpful', 
            'possentiment', 'negsentiment', 'sentipolarity', 'reviewersrep', 'revpictotal', 'prodpicstotal'],
  'bounds': [[0,1]]*19
}

# Generate samples
param_values = saltelli.sample(problem, 1000)

In [12]:
param_values.shape

(40000, 19)

In [13]:
# run model
Y = np.zeros([param_values.shape[0]])
for i, X in enumerate(param_values):
    # neural network model
    Y[i] = model.predict(X.reshape((1,-1)))

In [14]:
# Perform analysis
Si = sobol.analyze(problem, Y, print_to_console=True)
# Returns a dictionary with keys 'S1', 'S1_conf', 'ST', and 'ST_conf'
# (first and total-order indices with bootstrap confidence intervals)

Parameter S1 S1_conf ST ST_conf
merchanttype2 0.001389 0.003370 0.001511 0.000228
merchanttype3 -0.000790 0.001854 0.000482 0.000074
topads 0.000556 0.002664 0.000800 0.000130
cashback 0.000984 0.001081 0.000148 0.000027
cashbackval 0.016475 0.012971 0.018837 0.006440
price 0.320142 0.063778 0.446302 0.051356
prodrating 0.000754 0.003645 0.001888 0.000410
reviewcount 0.407346 0.050002 0.566409 0.068163
negreview -0.000274 0.002872 0.001212 0.000159
posreview -0.000156 0.004772 0.003107 0.000480
answercnt 0.087731 0.024397 0.113598 0.016451
otheragreemean -0.000928 0.001424 0.000211 0.000045
ratingmosthelpful 0.000943 0.002759 0.000987 0.000133
possentiment 0.000368 0.002092 0.000846 0.000159
negsentiment 0.000315 0.003445 0.001451 0.000265
sentipolarity 0.000765 0.004325 0.002208 0.000320
reviewersrep 0.000000 0.000000 0.000000 0.000000
revpictotal 0.021454 0.016345 0.035103 0.004866
prodpicstotal -0.000357 0.005543 0.003819 0.000539

Parameter_1 Parameter_2 S2 S2_conf
merchanttype2 me

In [15]:
Si['S1']

array([ 1.38919498e-03, -7.90457964e-04,  5.56141470e-04,  9.84435560e-04,
        1.64748302e-02,  3.20142416e-01,  7.53735009e-04,  4.07346071e-01,
       -2.74272872e-04, -1.56092987e-04,  8.77311580e-02, -9.28322186e-04,
        9.43164291e-04,  3.68427573e-04,  3.15406915e-04,  7.64570896e-04,
        0.00000000e+00,  2.14535459e-02, -3.57336667e-04])

In [9]:
??sobol.analyze