## Mahmoud MOhamamdi (800-8683389-mmoham12)
## ITSC 5010- Project

## TensorFlow Classifier

### Data Set : Breast Cancer
### Address :
    https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Original%29
### Feature Columns:            

1.Clump_Thickness 

2.Cell_Size_Uniformity

3.Cell_Shape_Uniformity

4.Marginal_Adhesion

5.Single_Epi_Cell_Size

6.Bare_Nuclei

7.Bland_Chromatin

8.Normal_Nucleoli

9.Mitoses

10.Class (2 for benign, 4 for malignant)

### Label Column 
    Class

In [9]:
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected

from sklearn import datasets
from sklearn.datasets.mldata import fetch_mldata
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import tempfile


import time
import numpy as np
import pandas as pd

In [39]:
class BaseModel(object):
    def __init__(self, sess, dataset_name , n_inputs, n_outputs, \
                 n_neurons, scope, epoch, batch_size, learning_rate):

        self.dataset = dataset_name
        self.n_inputs= n_inputs
        self.n_classes= n_outputs
        #self.n_hidden = n_hidden
        self.n_neurons = n_neurons
        
        self.scope= scope
        
        self.epoch = epoch
        self.batch_size = batch_size
        
        self.learning_rate= learning_rate
        
        self.sess = sess
        
        # Partitioning the dataset  
        X_data , y_labels, y_onehot = self.load_dataset(dataset_name)
        
        print(" Data %s ,  Label %s , OneHot %s " %(X_data.shape , y_labels.shape ,y_onehot.shape ))
        
        self.X_train, self.X_test, self.y_train, self.y_test \
            = train_test_split( X_data , y_labels, test_size = 0.3)
            
        
        self.build_model()
       
            #iris['data'] , iris['target'].reshape((-1,1)),test_size = 0.3)

    def load_dataset(self, dataset_name):
        
        #data_file_name = 'breast-cancer-wisconsin.data.txt'

        # Preparing the data:
        data_file = 'breast-cancer-wisconsin.data'

        col_names = "id,clump_thickness,unif_cell_size,unif_cell_shape,marg_adhesion,single_epith_cell_size,bare_nuclei,bland_chrom,norm_nucleoli,mitoses,class"
        
        col_names= col_names.split(',')
        
#         with open(data_file, "r+") as f:
#           content = f.read()
#           f.seek(0, 0)
#           f.write(first_line.rstrip('\r\n') + '\n' + content)
        base_dir = '/home/mmoham12/Projects/DeepLearningHW/'
    
        df = pd.read_csv(base_dir + data_file, names= col_names)

        df.replace('?', np.nan, inplace = True)
        df.dropna(inplace=True)
        df.drop(['id'], axis = 1, inplace = True)

        df['class'].replace('2',0, inplace = True)
        df['class'].replace('4',1, inplace = True)
        
        df['bare_nuclei'] = pd.to_numeric(df['bare_nuclei'])

        #df.to_csv("cleaned_data.csv", index = False)

        y= np.array(df['class'], dtype= np.int)
               
        X = np.array(df.drop('class', axis =1), dtype= np.float)
             
#         dataset_home = tempfile.mkdtemp()

#         raw_data = fetch_mldata(dataset_name , data_home=dataset_home)

#         #dataset = datasets.load_iris()
        
#         y = raw_data.target
        
        y = y.reshape(y.shape[0])
        
        if dataset_name == 'breast-cancer':
            y[y==2]= 0 
            y[y==4]= 1
        
        y_onehot = np.zeros( (len(y) , self.n_classes), dtype=np.float)
        
        for i, lbl in enumerate(y):
            y_onehot[i, y[i]] = 1.0
            
        return X, y , y_onehot# data , labels
        
      

    def build_model(self):
        
        with tf.name_scope(self.scope) :
            
            inputs_dim= [self.n_inputs]
            
            y_dim = []
            
            self.inputs= tf.placeholder(tf.float32, shape=[None] + inputs_dim,  name='inputs')
            
            self.y = tf.placeholder(tf.float32, shape=[None] + y_dim, name='y')

           # print("self.X_train %s " %(self.X_train.shape ))
            
            # Hidden Layer with ReLU Activation function as default
            
            inputs = fully_connected(self.X_train, self.n_neurons )
            
            print("inputs %s " %(inputs.shape ))
            
            hidden1 = fully_connected(inputs, self.n_neurons )
            
            print("Hidern 1 %s " %(hidden1.shape))

            # Last Layer of model without applying Activation function: Logits
            logits = fully_connected(hidden1, self.n_classes, activation_fn=None)

            # Defining Loss function based on Entropy
            
            print("Logits %s , Label %s" %(logits.shape , self.y_train.shape))
            
            
           # xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels= self.y_train , logits= logits)
            xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits= logits ,labels= self.y_train )

            self.loss = tf.reduce_mean(xentropy, name="loss") # Average of all logits
            
            self.saver = tf.train.Saver()
            
            # Evaluation of Logits
            
            evals = tf.nn.in_top_k(tf.cast(logits , tf.float32), self.y_train, 1)
            
            self.accuracy = tf.reduce_mean( tf.cast(evals , tf.float32))

        

    def train(self):
        print("Start Training...\n")
        
        optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
        
               
        tf.global_variables_initializer().run(session= self.sess)
                    
        num_batches = len(self.X_train) // self.batch_size 
        
        
        start_time = time.time()
        
        for epoch in range(self.epoch):
            
            #print("Epoch Index"+ epoch)
            
            for idx in range(num_batches):
                
                #print("Batch Index"+ idx)
                
                batch_data = self.X_train[idx *  self.batch_size: (idx+1) *  self.batch_size] 
                batch_labeles= self.y_train[idx *  self.batch_size: (idx+1) *  self.batch_size] 
                
                #print("Batch Data %s , Batch Label %s" %(batch_data.shape , batch_labeles.shape))
                
                with self.sess.as_default():
                    
                    self.sess.run([optimizer],
                        feed_dict={self.inputs: batch_data, 
                                   self.y:      batch_labeles  
                                  })
                
#                     acc_train = self.accuracy.eval( feed_dict={
#                     self.inputs: batch_data, 
#                     self.y:      batch_labeles  
#                      })
                
                    acc_test  = self.accuracy.eval( feed_dict={
                    self.inputs: self.X_test, 
                    self.y:      self.y_test  
                                                          
                    })
                   
                print("Dataset:[%s]-> Epoch:[%2d], Batch :[%2d/%3d] time: %4.4f, Accuracy: %.6f"
                      % (self.dataset, epoch+1, idx, num_batches,
                         time.time() - start_time, acc_test))
                
            

In [40]:
#def main():
    
with tf.Session() as sess:
        nn_obj = BaseModel(
            sess,
            dataset_name ='breast-cancer',
            scope = 'Project', 
            epoch = 10, 
            n_inputs = 9, # breast_cancer has  9 features
            n_outputs = 2,# breast_cancer has  2 classes            
            n_neurons = 20,           
            batch_size= 20, 
            learning_rate = 0.1
            )
        
nn_obj.train()

 Data (683, 9) ,  Label (683,) , OneHot (683, 2) 
inputs (478, 20) 
Hidern 1 (478, 20) 
Logits (478, 2) , Label (478,)
Start Training...

Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 0/ 23] time: 0.0298, Accuracy: 0.439331
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 1/ 23] time: 0.0322, Accuracy: 0.533473
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 2/ 23] time: 0.0345, Accuracy: 0.627615
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 3/ 23] time: 0.0366, Accuracy: 0.692469
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 4/ 23] time: 0.0388, Accuracy: 0.715481
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 5/ 23] time: 0.0417, Accuracy: 0.736402
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 6/ 23] time: 0.0443, Accuracy: 0.811715
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 7/ 23] time: 0.0470, Accuracy: 0.855649
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 8/ 23] time: 0.0499, Accuracy: 0.866109
Dataset:[breast-cancer]-> Epoch:[ 1], Batch :[ 9/ 23] time: 0.0528, Accuracy: 0

Dataset:[breast-cancer]-> Epoch:[ 5], Batch :[18/ 23] time: 0.4323, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 5], Batch :[19/ 23] time: 0.4346, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 5], Batch :[20/ 23] time: 0.4401, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 5], Batch :[21/ 23] time: 0.4429, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 5], Batch :[22/ 23] time: 0.4452, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 0/ 23] time: 0.4500, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 1/ 23] time: 0.4543, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 2/ 23] time: 0.4583, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 3/ 23] time: 0.4643, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 4/ 23] time: 0.4667, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch :[ 5/ 23] time: 0.4697, Accuracy: 0.968619
Dataset:[breast-cancer]-> Epoch:[ 6], Batch

Dataset:[breast-cancer]-> Epoch:[10], Batch :[19/ 23] time: 0.8342, Accuracy: 0.974895
Dataset:[breast-cancer]-> Epoch:[10], Batch :[20/ 23] time: 0.8401, Accuracy: 0.976987
Dataset:[breast-cancer]-> Epoch:[10], Batch :[21/ 23] time: 0.8444, Accuracy: 0.974895
Dataset:[breast-cancer]-> Epoch:[10], Batch :[22/ 23] time: 0.8483, Accuracy: 0.976987
