In [1]:
#%matplotlib inline
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
import numpy as np
import pandas as pd

In [2]:
diamonds = pd.read_csv('diamonds.csv')
diamonds.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [3]:
diamonds["price"].max()

18823

In [4]:
diamonds["price"].min()

326

In [5]:
diamonds["price"]

0         326
1         326
2         327
3         334
4         335
         ... 
53935    2757
53936    2757
53937    2757
53938    2757
53939    2757
Name: price, Length: 53940, dtype: int64

## Create Bins for the Diamond Prices

In [6]:
# Create bins in which to place values based upon Diamond Price
bins = [0, 499, 999, 2499, 4999, 7499, 9999, 14999, 19999]

In [7]:
label_groups = [1,2,3,4,5,6,7,8]

In [8]:
# Create labels for these bins
group_labels = ["0-500", "500-1000", "1000-2500", "2500-5000", "5000-7500", "7500-10000", "1000-15000", "15000-20000"]

In [9]:
# Slice the data and place it into bins
diamonds["bins"] = pd.cut(diamonds["price"], bins, labels=label_groups)
diamonds.head()

Unnamed: 0.1,Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z,bins
0,1,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43,1
1,2,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31,1
2,3,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31,1
3,4,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63,1
4,5,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75,1


In [10]:
#.values.reshape(-1,1)

In [11]:
X = diamonds[["carat", "cut", "color", "clarity"]]
y = diamonds["bins"]
print(X.shape, y.shape)

(53940, 4) (53940,)


In [12]:
y

0        1
1        1
2        1
3        1
4        1
        ..
53935    4
53936    4
53937    4
53938    4
53939    4
Name: bins, Length: 53940, dtype: category
Categories (8, int64): [1 < 2 < 3 < 4 < 5 < 6 < 7 < 8]

## Dummy Encoding (Binary Encoded Data)

In [13]:
data = X.copy()

data_binary_encoded = pd.get_dummies(data, columns=["cut", "color", "clarity"])
data_binary_encoded.head()

Unnamed: 0,carat,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_D,color_E,color_F,color_G,...,color_I,color_J,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2
0,0.23,0,0,1,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0.21,0,0,0,1,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
2,0.23,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
3,0.29,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
4,0.31,0,1,0,0,0,0,0,0,0,...,0,1,0,0,0,1,0,0,0,0


## Scaling and Normalization

In [14]:
from sklearn.model_selection import train_test_split

X = data_binary_encoded

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

X_train.head()

Unnamed: 0,carat,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_D,color_E,color_F,color_G,...,color_I,color_J,clarity_I1,clarity_IF,clarity_SI1,clarity_SI2,clarity_VS1,clarity_VS2,clarity_VVS1,clarity_VVS2
35965,0.25,0,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
52281,0.84,0,0,1,0,0,0,0,0,0,...,0,1,0,0,1,0,0,0,0,0
6957,1.05,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,1,0,0
9163,1.02,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
50598,0.61,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0


In [15]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)

In [16]:
# Transform the training and testing data using the X_scaler

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
from tensorflow.keras.utils import to_categorical

In [18]:
# One-hot encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [69]:
nodes = [21,42,63,126]

In [70]:
hidden_layers = [0,1,2]

In [71]:
epochs_count = [20,40,60]

In [72]:
train_data_node_count = []
train_data_accuracy = []
train_data_loss = []
train_data_epochs = []

test_data_node_count = []
test_data_accuracy = []
test_data_loss = []
test_data_epochs = []


In [73]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
model = Sequential()

k = 0

for i in range(3):
    count = nodes[i]
    
    if i == 0:
        print("No Hidden Layers: \n")
        for j in range(3):
            current_epochs_count = epochs_count[j]

            print(f"Epochs count: {current_epochs_count} \n")
            for i in range(len(nodes)):
                print(f"Model: {k}")
                #create 4 models with varying node sizes 21,42,63,126
                model.add(Dense(units=count, activation='relu', input_dim=21))
                model.add(Dense(units=9, activation='softmax'))
                #print(model.summary())

                # Compile the model
                model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

                # Fit the model to the training data
                model.fit(
                    X_train_scaled,
                    y_train_categorical,
                    epochs=current_epochs_count,
                    shuffle=True,
                    verbose=0
                )

                # Print the number of nodes for this model
                print(f"Number of Nodes: {nodes[i]} \n")

                # Print the training data accuracy
                model_loss, model_accuracy = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
                print(f"Normal Neural Network - Loss: {model_loss}, Train Data Accuracy: {model_accuracy}  \n")

                # Append aquired data to lists
                train_data_node_count.append(count)
                train_data_accuracy.append(model_accuracy)
                train_data_loss.append(model_loss)
                train_data_epochs.append(current_epochs_count)

                # Print the test data accuracy
                model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
                print(f"Normal Neural Network - Loss: {model_loss}, Test Data Accuracy: {model_accuracy} \n")

                # Append aquired data to lists
                test_data_node_count.append(count)
                test_data_accuracy.append(model_accuracy)
                test_data_loss.append(model_loss)
                test_data_epochs.append(current_epochs_count)
                
                # Save the Model
                model.save(f"Models/diamond_model{k}_trained.h5")
                k += 1
            
            
    if i == 1:
        print("One Hidden Layer: \n")
        for j in range(3):
            current_epochs_count = epochs_count[j]

            print(f"Epochs count: {current_epochs_count} \n")
            for i in range(len(nodes)):
                print(f"Model: {k}")
                model.add(Dense(units=count, activation='relu', input_dim=21))
                model.add(Dense(units=count, activation='relu'))
                model.add(Dense(units=9, activation='softmax'))
                #print(model.summary())

                # Compile the model
                model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

                # Fit the model to the training data
                model.fit(
                    X_train_scaled,
                    y_train_categorical,
                    epochs=current_epochs_count,
                    shuffle=True,
                    verbose=0
                )

                # Print the number of nodes for this model
                print(f"Number of Nodes: {nodes[i]} \n")

                # Print the training data accuracy
                model_loss, model_accuracy = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
                print(f"Normal Neural Network - Loss: {model_loss}, Train Data Accuracy: {model_accuracy}  \n")

                # Append aquired data to lists
                train_data_node_count.append(count)
                train_data_accuracy.append(model_accuracy)
                train_data_loss.append(model_loss)
                train_data_epochs.append(current_epochs_count)

                # Print the test data accuracy
                model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
                print(f"Normal Neural Network - Loss: {model_loss}, Test Data Accuracy: {model_accuracy} \n")

                # Append aquired data to lists
                test_data_node_count.append(count)
                test_data_accuracy.append(model_accuracy)
                test_data_loss.append(model_loss)
                test_data_epochs.append(current_epochs_count)
                
                # Save the Model
                model.save(f"Models/diamond_model{k}_trained.h5")
                k += 1
    
    if i == 2:
        print("Two Hidden Layers: \n")
        for j in range(3):
            current_epochs_count = epochs_count[j]

            print(f"Epochs count: {current_epochs_count} \n")
            for i in range(len(nodes)):
                print(f"Model: {k}")

                model.add(Dense(units=count, activation='relu', input_dim=21))
                model.add(Dense(units=count, activation='relu'))
                model.add(Dense(units=count, activation='relu'))
                model.add(Dense(units=9, activation='softmax'))
                #print(model.summary())

                # Compile the model
                model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

                # Fit the model to the training data
                model.fit(
                    X_train_scaled,
                    y_train_categorical,
                    epochs=current_epochs_count,
                    shuffle=True,
                    verbose=0
                )

                # Print the number of nodes for this model
                print(f"Number of Nodes: {nodes[i]} \n")

                # Print the training data accuracy
                model_loss, model_accuracy = model.evaluate(X_train_scaled, y_train_categorical, verbose=2)
                print(f"Deep Learning - Loss: {model_loss}, Train Data Accuracy: {model_accuracy}  \n")

                # Append aquired data to lists
                train_data_node_count.append(count)
                train_data_accuracy.append(model_accuracy)
                train_data_loss.append(model_loss)
                train_data_epochs.append(current_epochs_count)

                # Print the test data accuracy
                model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
                print(f"Deep Learning - Loss: {model_loss}, Test Data Accuracy: {model_accuracy} \n")

                # Append aquired data to lists
                test_data_node_count.append(count)
                test_data_accuracy.append(model_accuracy)
                test_data_loss.append(model_loss)
                test_data_epochs.append(current_epochs_count)
                
                # Save the Model
                model.save(f"Models/diamond_model{k}_trained.h5")
                k += 1
                
# Print the lists
print(f"Train Data Node Count: {train_data_node_count} \n")
print(f"Train Data Accuracy: {train_data_accuracy} \n")
print(f"Train Data Data Loss: {train_data_loss} \n")
print(f"Train Data Epochs Count: {train_data_epochs} \n")

print(f"Test Data Node Count: {test_data_node_count} \n")
print(f"Test Data Accuracy: {test_data_accuracy} \n")
print(f"Test Data Data Loss: {test_data_loss} \n")
print(f"Test Data Epochs Count: {test_data_epochs} \n")

No Hidden Layers: 

Model: 0
Epochs count: 20 

Number of Nodes: 21 

40455/40455 - 2s - loss: 0.3261 - acc: 0.8650
Normal Neural Network - Loss: 0.32608809221138574, Train Data Accuracy: 0.8649857640266418  

13485/13485 - 1s - loss: 0.3284 - acc: 0.8646
Normal Neural Network - Loss: 0.3284134024397108, Test Data Accuracy: 0.8645902872085571 

Number of Nodes: 42 

40455/40455 - 3s - loss: 0.3164 - acc: 0.8676
Normal Neural Network - Loss: 0.31639272125177487, Train Data Accuracy: 0.8675812482833862  

13485/13485 - 1s - loss: 0.3248 - acc: 0.8657
Normal Neural Network - Loss: 0.32484385320377385, Test Data Accuracy: 0.8657026290893555 



KeyboardInterrupt: 

In [19]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=63, activation='relu', input_dim=21))
model.add(Dense(units=63, activation='relu'))
model.add(Dense(units=63, activation='relu'))
model.add(Dense(units=9, activation='softmax'))

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 63)                1386      
_________________________________________________________________
dense_1 (Dense)              (None, 9)                 576       
Total params: 1,962
Trainable params: 1,962
Non-trainable params: 0
_________________________________________________________________


In [21]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [22]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
40455/40455 - 1s - loss: 1.1088 - acc: 0.5845
Epoch 2/100
40455/40455 - 1s - loss: 0.5830 - acc: 0.7998
Epoch 3/100
40455/40455 - 1s - loss: 0.4526 - acc: 0.8285
Epoch 4/100
40455/40455 - 1s - loss: 0.4005 - acc: 0.8401
Epoch 5/100
40455/40455 - 1s - loss: 0.3745 - acc: 0.8492
Epoch 6/100
40455/40455 - 1s - loss: 0.3598 - acc: 0.8534
Epoch 7/100
40455/40455 - 1s - loss: 0.3495 - acc: 0.8560
Epoch 8/100
40455/40455 - 1s - loss: 0.3435 - acc: 0.8573
Epoch 9/100
40455/40455 - 1s - loss: 0.3378 - acc: 0.8584
Epoch 10/100
40455/40455 - 1s - loss: 0.3350 - acc: 0.8596
Epoch 11/100
40455/40455 - 1s - loss: 0.3319 - acc: 0.8620
Epoch 12/100
40455/40455 - 1s - loss: 0.3295 - acc: 0.8624
Epoch 13/100
40455/40455 - 1s - loss: 0.3281 - acc: 0.8628
Epoch 14/100
40455/40455 - 1s - loss: 0.3254 - acc: 0.8628
Epoch 15/100
40455/40455 - 1s - loss: 0.3252 - acc: 0.8622
Epoch 16/100
40455/40455 - 1s - loss: 0.3238 - acc: 0.8640
Epoch 17/100
40455/40455 - 1s - loss: 0.3227 - acc: 0.8637
Epoch 

<tensorflow.python.keras.callbacks.History at 0x1f71c6a28d0>

In [23]:
model_loss, model_accuracy = model.evaluate(
    X_train_scaled, y_train_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

40455/40455 - 1s - loss: 0.3000 - acc: 0.8710
Normal Neural Network - Loss: 0.2999517162121531, Accuracy: 0.8710171580314636


In [24]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

13485/13485 - 0s - loss: 0.3248 - acc: 0.8666
Normal Neural Network - Loss: 0.32482521970668987, Accuracy: 0.866592526435852
