In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import shutil

  from ._conv import register_converters as _register_converters


In [None]:
import urllib.request
url = 'https://storage.googleapis.com/mclean_test_bucket/python/breastCancer.csv'  
urllib.request.urlretrieve(url, 'breastCancer.csv')
#http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Original)

In [2]:
dataset = pd.read_csv('breastCancer.csv')# http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Original)
dataset.iloc[:,0:11]#all rows, columns 0 up to 11

Unnamed: 0,id,clump_thickness,size_uniformity,shape_uniformity,marginal_adhesion,epithelial_size,bare_nucleoli,bland_chromatin,normal_nucleoli,mitoses,class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2
5,1017122,8,10,10,8,7,10,9,7,1,4
6,1018099,1,1,1,1,2,10,3,1,1,2
7,1018561,2,1,2,1,2,1,3,1,1,2
8,1033078,2,1,1,1,2,1,1,1,5,2
9,1033078,4,2,1,1,2,1,2,1,1,2


In [3]:
dataset.describe()

Unnamed: 0,id,clump_thickness,size_uniformity,shape_uniformity,marginal_adhesion,epithelial_size,bland_chromatin,normal_nucleoli,mitoses,class
count,699.0,699.0,699.0,699.0,699.0,699.0,699.0,699.0,699.0,699.0
mean,1071704.0,4.41774,3.134478,3.207439,2.806867,3.216023,3.437768,2.866953,1.589413,2.689557
std,617095.7,2.815741,3.051459,2.971913,2.855379,2.2143,2.438364,3.053634,1.715078,0.951273
min,61634.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0
25%,870688.5,2.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,2.0
50%,1171710.0,4.0,1.0,1.0,1.0,2.0,3.0,1.0,1.0,2.0
75%,1238298.0,6.0,5.0,5.0,4.0,4.0,5.0,4.0,1.0,4.0
max,13454350.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,4.0


In [None]:
import seaborn as sns
sns.set(style="ticks")
sns.set_palette("Set1", n_colors=2)
sns.pairplot(dataset.iloc[:,1:11], hue="class")

In [4]:
# In CSV, label is the first column, after the features
CSV_COLUMNS = ['clump_thickness', 'size_uniformity','shape_uniformity','marginal_adhesion','epithelial_size','bare_nucleoli','bland_chromatin','normal_nucleoli','mitoses','class']
FEATURES = CSV_COLUMNS[0:len(CSV_COLUMNS) - 1]
LABEL = CSV_COLUMNS[9]
print(len(dataset))
dataset2 = dataset[dataset.bare_nucleoli != '?']#remove rows with bare_nucleoli='?'
print(len(dataset2))
dataset2 = dataset2.astype(int)
dataset2['class'][dataset2['class'] == 2] = 0 #convert class 2 to 0, benign
dataset2['class'][dataset2['class'] == 4] = 1 #convert class 4 to 1, malignant
#print(dataset2.dtypes)
df_train = dataset2.iloc[0:535,1:11] # use first 78% as training data
df_eval = dataset2.iloc[535:645,1:11] # use next 16% as eval data
df_test = dataset2.iloc[645:,1:10] # use last 6% as test data
print(FEATURES)
print(LABEL)
print(dataset2)
print(df_train.head())
print(df_train.tail())
print(df_train.dtypes)
df_test

699
683
['clump_thickness', 'size_uniformity', 'shape_uniformity', 'marginal_adhesion', 'epithelial_size', 'bare_nucleoli', 'bland_chromatin', 'normal_nucleoli', 'mitoses']
class
          id  clump_thickness  size_uniformity  shape_uniformity  \
0    1000025                5                1                 1   
1    1002945                5                4                 4   
2    1015425                3                1                 1   
3    1016277                6                8                 8   
4    1017023                4                1                 1   
5    1017122                8               10                10   
6    1018099                1                1                 1   
7    1018561                2                1                 2   
8    1033078                2                1                 1   
9    1033078                4                2                 1   
10   1035283                1                1                 1   
11   

Unnamed: 0,clump_thickness,size_uniformity,shape_uniformity,marginal_adhesion,epithelial_size,bare_nucleoli,bland_chromatin,normal_nucleoli,mitoses
661,4,1,1,1,2,1,3,1,1
662,1,1,3,1,2,1,2,1,1
663,1,1,3,1,2,1,2,1,1
664,3,1,1,3,2,1,2,1,1
665,1,1,1,1,2,1,1,1,1
666,5,2,2,2,2,1,1,1,2
667,3,1,1,1,2,1,3,1,1
668,5,7,4,1,6,1,7,10,3
669,5,10,10,8,5,5,7,10,1
670,3,10,7,8,5,8,7,4,1


In [5]:
def make_feature_cols():
  input_columns = [tf.feature_column.numeric_column(k) for k in FEATURES]
  return input_columns

In [6]:
feature_columns = make_feature_cols()
feature_columns

[_NumericColumn(key='clump_thickness', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='size_uniformity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='shape_uniformity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='marginal_adhesion', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='epithelial_size', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='bare_nucleoli', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='bland_chromatin', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='normal_nucleoli', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='mitoses', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [7]:
tf.logging.set_verbosity(tf.logging.INFO)

OUTDIR = './logs/breastCancer_trained'
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

myopt = tf.train.FtrlOptimizer(learning_rate = 0.01)
model = tf.estimator.DNNClassifier(feature_columns = make_feature_cols(), 
                                   model_dir = OUTDIR, hidden_units=[10, 10], 
                                   n_classes=2,optimizer = myopt,
                                   activation_fn = tf.nn.relu)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './logs/breastCancer_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1816b4c860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


![title](bcnn3.png)

In [8]:
def make_input_fn(df, num_epochs):
  return tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    num_epochs = num_epochs,
    shuffle = True,
    num_threads = 1
  )

In [9]:
model.train(input_fn = make_input_fn(df_train, num_epochs = 3000))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into ./logs/breastCancer_trained/model.ckpt.
INFO:tensorflow:loss = 90.194, step = 1
INFO:tensorflow:global_step/sec: 231.587
INFO:tensorflow:loss = 53.262722, step = 101 (0.434 sec)
INFO:tensorflow:global_step/sec: 339.269
INFO:tensorflow:loss = 41.958725, step = 201 (0.295 sec)
INFO:tensorflow:global_step/sec: 333.695
INFO:tensorflow:loss = 35.936295, step = 301 (0.300 sec)
INFO:tensorflow:global_step/sec: 286.828
INFO:tensorflow:loss = 32.954674, step = 401 (0.349 sec)
INFO:tensorflow:global_step/sec: 345.273
INFO:tensorflow:loss = 37.92449, step = 501 (0.289 sec)
INFO:tensorflow:global_step/sec: 357.967
INFO:tensorflow:loss = 24.966667, step = 601 (0.280 sec)
INFO:tensorflow:global_step/sec: 349.726
INFO:tens

INFO:tensorflow:global_step/sec: 335.465
INFO:tensorflow:loss = 10.348414, step = 8101 (0.296 sec)
INFO:tensorflow:global_step/sec: 352.122
INFO:tensorflow:loss = 9.083048, step = 8201 (0.286 sec)
INFO:tensorflow:global_step/sec: 362.319
INFO:tensorflow:loss = 5.030718, step = 8301 (0.277 sec)
INFO:tensorflow:global_step/sec: 359.061
INFO:tensorflow:loss = 6.704502, step = 8401 (0.278 sec)
INFO:tensorflow:global_step/sec: 351.917
INFO:tensorflow:loss = 4.6397057, step = 8501 (0.284 sec)
INFO:tensorflow:global_step/sec: 349.311
INFO:tensorflow:loss = 10.821503, step = 8601 (0.286 sec)
INFO:tensorflow:global_step/sec: 373.388
INFO:tensorflow:loss = 11.481302, step = 8701 (0.271 sec)
INFO:tensorflow:global_step/sec: 350.46
INFO:tensorflow:loss = 11.234636, step = 8801 (0.282 sec)
INFO:tensorflow:global_step/sec: 307.314
INFO:tensorflow:loss = 10.679205, step = 8901 (0.326 sec)
INFO:tensorflow:global_step/sec: 352.274
INFO:tensorflow:loss = 4.5405602, step = 9001 (0.283 sec)
INFO:tensorflo

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1816b4c9e8>

In [10]:
ev = model.evaluate(input_fn = make_input_fn(df_eval, num_epochs = 1))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-10-09-17:45:35
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./logs/breastCancer_trained/model.ckpt-12540
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-10-09-17:45:35
INFO:tensorflow:Saving dict for global step 12540: accuracy = 1.0, accuracy_baseline = 0.76363635, auc = 1.0, auc_precision_recall = 0.99999994, average_loss = 0.01896366, global_step = 12540, label/mean = 0.23636363, loss = 2.0860026, prediction/mean = 0.24478434


In [11]:
print(ev)

{'accuracy': 1.0, 'accuracy_baseline': 0.76363635, 'auc': 1.0, 'auc_precision_recall': 0.99999994, 'average_loss': 0.01896366, 'label/mean': 0.23636363, 'loss': 2.0860026, 'prediction/mean': 0.24478434, 'global_step': 12540}


In [12]:
print('RMSE on {} dataset = {}'.format('validation', np.sqrt(ev['average_loss'])))
print('Accuracy on {} dataset = {}'.format('validation', np.sqrt(ev['accuracy'])))

RMSE on validation dataset = 0.1377086043357849
Accuracy on validation dataset = 1.0


In [13]:
def make_test_fn(df, num_epochs):
  return tf.estimator.inputs.pandas_input_fn(
    x = df,
    num_epochs = num_epochs,
    shuffle = False,
    num_threads = 1
  )

In [14]:
prediction_result = model.predict(input_fn = make_test_fn(df_test, 1))

In [15]:
print(list(prediction_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./logs/breastCancer_trained/model.ckpt-12540
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[{'logits': array([-6.189175], dtype=float32), 'logistic': array([0.00204732], dtype=float32), 'probabilities': array([0.9979527 , 0.00204732], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}, {'logits': array([-5.768416], dtype=float32), 'logistic': array([0.00311497], dtype=float32), 'probabilities': array([0.996885  , 0.00311497], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}, {'logits': array([-5.768416], dtype=float32), 'logistic': array([0.00311497], dtype=float32), 'probabilities': array([0.996885  , 0.00311497], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}, {'logits': array([-7.136414], dtype=float32)

In [16]:
predictions = list(model.predict(input_fn = make_test_fn(df_test, 1)))
predicted_classes = [p["classes"] for p in predictions]
probabilities = [p["probabilities"] for p in predictions]
#print(len(probabilities))
for x in range(len(probabilities)):
    print(
        "Class Prediction:    {}"
        .format(predicted_classes[x]))

    print("Probability:    {}\n"
        .format(probabilities[x]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./logs/breastCancer_trained/model.ckpt-12540
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
Class Prediction:    [b'0']
Probability:    [0.9979527  0.00204732]

Class Prediction:    [b'0']
Probability:    [0.996885   0.00311497]

Class Prediction:    [b'0']
Probability:    [0.996885   0.00311497]

Class Prediction:    [b'0']
Probability:    [9.9920505e-01 7.9496752e-04]

Class Prediction:    [b'0']
Probability:    [0.9957337  0.00426632]

Class Prediction:    [b'0']
Probability:    [0.9948708  0.00512926]

Class Prediction:    [b'0']
Probability:    [0.9976059  0.00239405]

Class Prediction:    [b'1']
Probability:    [0.12418152 0.8758185 ]

Class Prediction:    [b'1']
Probability:    [0.00724882 0.99275124]

Class Prediction:    [b'1']
Probability:    [0.16493249 0.8350675 ]

Class Prediction:    [b'

In [17]:
df_test

Unnamed: 0,clump_thickness,size_uniformity,shape_uniformity,marginal_adhesion,epithelial_size,bare_nucleoli,bland_chromatin,normal_nucleoli,mitoses
661,4,1,1,1,2,1,3,1,1
662,1,1,3,1,2,1,2,1,1
663,1,1,3,1,2,1,2,1,1
664,3,1,1,3,2,1,2,1,1
665,1,1,1,1,2,1,1,1,1
666,5,2,2,2,2,1,1,1,2
667,3,1,1,1,2,1,3,1,1
668,5,7,4,1,6,1,7,10,3
669,5,10,10,8,5,5,7,10,1
670,3,10,7,8,5,8,7,4,1


In [18]:
#save model
clump_thickness = tf.feature_column.numeric_column("clump_thickness");
size_uniformity = tf.feature_column.numeric_column("size_uniformity");
shape_uniformity = tf.feature_column.numeric_column("shape_uniformity");
marginal_adhesion = tf.feature_column.numeric_column("marginal_adhesion");
epithelial_size = tf.feature_column.numeric_column("epithelial_size");
bare_nucleoli = tf.feature_column.numeric_column("bare_nucleoli");
bland_chromatin = tf.feature_column.numeric_column("bland_chromatin");
normal_nucleoli = tf.feature_column.numeric_column("normal_nucleoli");
mitoses = tf.feature_column.numeric_column("mitoses");
feature_columns = [clump_thickness, size_uniformity, shape_uniformity, marginal_adhesion, epithelial_size, 
                   bare_nucleoli, bland_chromatin, normal_nucleoli, mitoses];
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns);
export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec);
model.export_savedmodel(OUTDIR, export_input_fn, as_text=False)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: ['regression']
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Restoring parameters from ./logs/breastCancer_trained/model.ckpt-12540
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"./logs/breastCancer_trained/temp-b'1539107158'/saved_model.pb"


b'./logs/breastCancer_trained/1539107158'

In [19]:
# Load model from export directory, and make a predict function.
predict_fn = tf.contrib.predictor.from_saved_model("./logs/breastCancer_trained/1539107158")

# Test inputs represented by Pandas DataFrame.
inputs = pd.DataFrame({
    'clump_thickness': [2,5],
    'size_uniformity': [1,10],
    'shape_uniformity': [1,10],
    'marginal_adhesion': [1,3],
    'epithelial_size': [2,7],
    'bland_chromatin': [1,3],
    'bare_nucleoli': [2,8],
    'normal_nucleoli': [1,10],
    'mitoses': [1,2],
})

# Convert input data into serialized Example strings.
examples = []
for index, row in inputs.iterrows():
    feature = {}
    for col, value in row.iteritems():
        feature[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
    example = tf.train.Example(
        features=tf.train.Features(
            feature=feature
        )
    )
    examples.append(example.SerializeToString())

# Make predictions.
predictions = predict_fn({'inputs': examples})

Instructions for updating:
Use the retry module or similar alternatives.
INFO:tensorflow:Restoring parameters from b'./logs/breastCancer_trained/1539107158/variables/variables'


In [20]:
predictions

{'classes': array([[b'0', b'1'],
        [b'0', b'1']], dtype=object), 'scores': array([[0.9941346, 0.0058654],
        [0.0632105, 0.9367895]], dtype=float32)}