<a href="https://colab.research.google.com/github/nshoo/Neural-Synthesis-Visualization/blob/main/pima_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [None]:
# load data
data = pd.read_csv( 'diabetes.csv', names=[ 'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome' ] )

for col in data:
    data[ col ] = data[ col ].astype( float )

rows = len(data)
print(rows)

print(data.shape[1])
data.head()
type(data["Pregnancies"])
data["Pregnancies"].shape
newData = data[["Pregnancies", "Age"]]
newData.head()


768
9


Unnamed: 0,Pregnancies,Age
0,6.0,50.0
1,1.0,31.0
2,8.0,32.0
3,1.0,21.0
4,0.0,33.0


In [None]:
# impute
for col in [ 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI' ]:
    mean = data[ data[ col ] != 0.0 ][ col ].mean()
    data[ col ] = data[ col ].replace( 0.0, mean )
    

In [None]:
# shuffle data "in place": https://stackoverflow.com/a/34879805
data = data.sample( frac = 1 ).reset_index( drop = True )

xs = data.copy()
ys = xs.pop( 'Outcome' ).to_frame( name = 'Outcome' )

test_len = len( xs ) // 5

train_xs, test_xs = xs.head( len( xs ) - test_len ), xs.tail( test_len )
train_ys, test_ys = ys.head( len( xs ) - test_len ), ys.tail( test_len )

norm = keras.layers.Normalization()
norm.adapt( train_xs )

model = keras.Sequential()
model.add( norm )
model.add( keras.layers.Dense( 9, input_dim=9, kernel_initializer='normal', activation='relu' ) )
model.add( keras.layers.Dense( 20, activation='relu' ) )
model.add( keras.layers.Dense( 20, activation='relu' ) )
model.add( keras.layers.Dense( 1, activation='relu' ) )
model.add( keras.layers.ReLU( max_value = 1.0 ) ) # for clamping
model.summary()

model.compile( loss = tf.losses.MeanSquaredError(), optimizer = tf.optimizers.Adam(), metrics=[ tf.keras.metrics.MeanSquaredError() ] )

model.fit( train_xs, train_ys, epochs = 10 )
model.evaluate( test_xs, test_ys )


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 normalization (Normalizatio  (None, 8)                17        
 n)                                                              
                                                                 
 dense (Dense)               (None, 9)                 81        
                                                                 
 dense_1 (Dense)             (None, 20)                200       
                                                                 
 dense_2 (Dense)             (None, 20)                420       
                                                                 
 dense_3 (Dense)             (None, 1)                 21        
                                                                 
 re_lu (ReLU)                (None, 1)                 0         
                                                        

[0.16097089648246765, 0.16097089648246765]

In [None]:
test_xs

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
615,2.0,88.0,58.0,26.00000,16.000000,28.4,0.766,22.0
616,7.0,181.0,84.0,21.00000,192.000000,35.9,0.586,51.0
617,6.0,85.0,78.0,29.15342,155.548223,31.2,0.382,42.0
618,3.0,187.0,70.0,22.00000,200.000000,36.4,0.408,36.0
619,8.0,99.0,84.0,29.15342,155.548223,35.4,0.388,50.0
...,...,...,...,...,...,...,...,...
763,5.0,115.0,98.0,29.15342,155.548223,52.9,0.209,28.0
764,2.0,108.0,62.0,10.00000,278.000000,25.3,0.881,22.0
765,1.0,119.0,86.0,39.00000,220.000000,45.6,0.808,29.0
766,1.0,97.0,70.0,40.00000,155.548223,38.1,0.218,30.0


In [None]:

test_data = pd.concat([test_xs, test_ys], axis = 1)
test_data
# below equal to test_xs
newData = test_data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]

f = open("pima_constraints.txt", "w")

n = 100
oneCounter = 0
zeroCounter = 0
for i in range(len(test_xs)):
  point = test_xs.values[i]
  outcome = test_ys.values[i]
  f.write("(constraint (= (rig_mimic {}) {}))\n".format(' '.join(map(lambda x: "{:.4f}".format(x), point)), 'true' if outcome == 1 else 'false'))
  if outcome == 1:
    oneCounter += 1
  else:
    zeroCounter += 1

f.close()

print(f"ones: {oneCounter}; zeros: {zeroCounter}")

ones: 61; zeros: 92


In [None]:
# counterfactual generation
x1 = pd.DataFrame( data = [ [ 3.0, 129.0, 92.0, 49.0, 155.0, 36.4, 0.968, 32.0 ] ] )
y1 = model.predict( x1 )
print( x1 )
print( y1 )
print()

x2 = pd.DataFrame( data = [ [ 3.0, 129.0, 92.0, 49.0, 155.0, 35, 0.968, 32.0 ] ] )
y2 = model.predict( x2 )
print( x2 )
print( y2 )


model.save( './pima_model' )

     0      1     2     3      4     5      6     7
0  3.0  129.0  92.0  49.0  155.0  36.4  0.968  32.0
[[0.42056134]]

     0      1     2     3      4   5      6     7
0  3.0  129.0  92.0  49.0  155.0  35  0.968  32.0
[[0.41110656]]
INFO:tensorflow:Assets written to: ./pima_model/assets
