In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt

ImportError: cannot import name '_uarray' from 'scipy._lib._uarray' (C:\Users\Shaq\Anaconda3\lib\site-packages\scipy\_lib\_uarray\__init__.py)

In [None]:
import pandas as pd

In [None]:
breast = load_breast_cancer()
breast_data = breast.data
breast_data

In [None]:
breast_labels = breast.target
breast_labels.shape
breast_labels

In [None]:
#reshape labels to (569,1) so you can concatenate it w/ data
labels = np.reshape(breast_labels, (569, 1))

In [None]:
#concatenate along the second axis
final_breast_data = np.concatenate([breast_data, labels], axis = 1)
final_breast_data

In [None]:
#create  the dataframe w/ pandas
breast_dataset = pd.DataFrame(final_breast_data)
breast_dataset

In [None]:
#breast.features is size 30. since we appended the labels to the data, we need to add the
# label feature to breast.features
features        = breast.feature_names
features_labels = np.append(breast.feature_names, 'label')

In [None]:
#embed the features names into dataset columns
breast_dataset.columns = features_labels

In [None]:
#replace the labels 0 and 1 w/ benign and malignant
breast_dataset['label'].replace(0, 'Benign', inplace = True)
breast_dataset['label'].replace(1, 'Malignant', inplace = True)
#breast_dataset.tail()

In [None]:
#now start PCA
#first satndardize the data so that it fits a normal distribution
from sklearn.preprocessing import StandardScaler
x = breast_dataset.loc[:, features].values
x = StandardScaler().fit_transform(x)            #normalize the features

In [None]:
#check that mean and std dev. are 0 and 1
np.mean(x), np.std(x)

In [None]:
#convert x into a dataframe
#name the columns feature0.....feature30
feat_cols = ['feature'+str(i) for i in range(x.shape[1])]
normalised_breast = pd.DataFrame(x, columns = feat_cols)
normalised_breast.tail()

In [None]:
#now project the 30D breast cancer data to 2D principal components
from sklearn.decomposition import PCA
pca_breast = PCA(n_components = 2)
principalComponents_breast = pca_breast.fit_transform(x)

In [None]:
#create DF for the 2 component data
principle_breast_Df = pd.DataFrame(data = principalComponents_breast, columns = ['principle_component_1',
                                                                                'principle_component_2'])

In [None]:
principle_breast_Df.tail()

In [None]:
#print the variance ratio (how much information lies in each component)
print('Explained variation per principle component: {}'.format(pca_breast.explained_variance_ratio_))

In [None]:
#plot the data along the principle components
plt.figure()
plt.figure(figsize = (10,10))
plt.xticks(fontsize = 12)
plt.yticks(fontsize = 14)
plt.xlabel('PC 1', fontsize = 20)
plt.xlabel('PC 2', fontsize = 20)
plt.title('PCA')
targets = ['Benign', 'Malignant']
colors = ['r', 'g']
for target, color in zip(targets,colors):
    indicesToKeep = breast_dataset['label'] == target
    plt.scatter(principle_breast_Df.loc[indicesToKeep, 'principle_component_1'],
               principle_breast_Df.loc[indicesToKeep, 'principle_component_2'], c = color, s = 50)
    
plt.legend(targets,prop={'size': 15})

In [None]:
#create the neural network model
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
import tensorflow as tf

In [None]:
#add labels to the df
principle_breast_Df = principle_breast_Df.assign(Labels = np.reshape(breast_labels, (569, 1)))
principle_breast_Df.tail()

In [None]:
#@title Define the plotting function.

def plot_the_loss_curve(epochs, acc):
  """Plot a curve of loss vs. epoch."""

  plt.figure()
  plt.xlabel("Epoch")
  plt.ylabel("Accuracy")

  plt.plot(epochs, acc, label="Loss")
  plt.legend()
  plt.ylim([acc.min()*0.95, acc.max() * 1.03])
  plt.show()  

print("Defined the plot_the_loss_curve function.")

In [None]:
# Create an empty list that will eventually hold all created feature columns.
feature_columns = []

# Represent PC1 as a floating-point value.
#note that YOU CANT HAVE ANY SPACES IN THE FEATURES NAME
PC1 = tf.feature_column.numeric_column('principle_component_1')
feature_columns.append(PC1)

# Represent population as a floating-point value.
PC2 = tf.feature_column.numeric_column('principle_component_2')
feature_columns.append(PC2)

# Convert the list of feature columns into a layer that will later be fed into
# the model. 
my_feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [None]:
def create_model(my_learning_rate, my_feature_layer):
  """Create and compile a simple linear regression model."""
  # Most simple tf.keras models are sequential.
  model = tf.keras.models.Sequential()

  # Add the layer containing the feature columns to the model.
  model.add(my_feature_layer)

  #establish the metrics the odel will measure
  classification_threshold = 0.8
  METRICS = [
              tf.keras.metrics.BinaryAccuracy(name='accuracy',
                                             threshold=classification_threshold),
  ]

  # Describe the topography of the model by calling the tf.keras.layers.Dense
  # method once for each layer. We've specified the following arguments:
  #   * units specifies the number of nodes in this layer.
  #   * activation specifies the activation function (Rectified Linear Unit).
  #   * name is just a string that can be useful when debugging.

  activ = 'tanh'  
    
  # Define the first hidden layer with 20 nodes.   
  model.add(tf.keras.layers.Dense(units=10, 
                                  activation=activ, 
                                  name='Hidden1'))
  
  # Define the second hidden layer with 12 nodes. 
  model.add(tf.keras.layers.Dense(units=6, 
                                  activation=activ, 
                                  name='Hidden2'))
  
  # Define the output layer.
  model.add(tf.keras.layers.Dense(units=1,  
                                  name='Output'))                              
  
  model.compile(optimizer=tf.keras.optimizers.Adam(lr=my_learning_rate),
                loss="mean_squared_error",
                metrics=METRICS)

  return model


In [None]:
def train_model(model, dataset, epochs, label_name,
                batch_size=None):
  """Train the model by feeding it data."""

  # Split the dataset into features and label.
  features = {name:np.array(value) for name, value in dataset.items()}
  label = np.array(features.pop(label_name))
  history = model.fit(x=features, y=label, batch_size=batch_size,
                      epochs=epochs, shuffle=True) 

  # The list of epochs is stored separately from the rest of history.
  epochs = history.epoch
  
  # To track the progression of training, gather a snapshot
  # of the model's mean squared error at each epoch. 
  hist = pd.DataFrame(history.history)
  acc = hist["accuracy"]

  return epochs, acc

In [None]:
# The following variables are the hyperparameters.
learning_rate = 0.01
epochs = 20
batch_size = 50

train_df_norm = principle_breast_Df.tail(400)
test_df_norm = principle_breast_Df.head(168)

# Specify the label
label_name = "Labels"

# Establish the model's topography.
my_model = create_model(learning_rate, my_feature_layer)

# Train the model on the normalized training set. We're passing the entire
# normalized training set, but the model will only use the features
# defined by the feature_layer.
epochs, acc = train_model(my_model, train_df_norm, epochs, 
                          label_name, batch_size)
plot_the_loss_curve(epochs, acc)

# After building a model against the training set, test that model
# against the test set.
test_features = {name:np.array(value) for name, value in test_df_norm.items()}
test_label = np.array(test_features.pop(label_name)) # isolate the label
print("\n Evaluate the new model against the test set:")
my_model.evaluate(x = test_features, y = test_label, batch_size=batch_size)