In [10]:
import os
import pandas as pd 
import numpy as np

import tensorflow 
from tensorflow import keras
from keras.models import Model
from keras import layers, regularizers

import cv2 #images

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## import dependencies for model
%matplotlib inline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical, normalize
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy

from sklearn.metrics import classification_report, confusion_matrix

In [11]:
# Database connection parameters
import psycopg2
import json
import pandas as pd
import numpy as np
from PIL import Image
import io
import json

with open('config.json', 'r') as config_file:
    config = json.load(config_file)

DB_HOST = config["DB_HOST"]
DB_NAME = config["DB_NAME"]
DB_USER = config["DB_USER"]
DB_PASS = config["DB_PASS"]

In [12]:
# Function to load data from SQL and save it as a pandas DataFrame
def load_data_from_sql():
    try:
        # Connect to the database
        connection = psycopg2.connect(
            host=DB_HOST,
            database=DB_NAME,
            user=DB_USER,
            password=DB_PASS
        )
        cursor = connection.cursor()
        
        # SQL query to fetch the dataset
        query = "SELECT label, image_data FROM skin_cancer_images;"
        df = pd.read_sql(query, connection)  # Read into a pandas DataFrame
        
        # Return the DataFrame
        return df
        
    except Exception as e:
        print(f"An error occurred while loading data: {e}")
        return None
    finally:
        if cursor:
            cursor.close()
        if connection:
            connection.close()

In [13]:
# Load the dataset into a DataFrame
df = load_data_from_sql()
# Display the DataFrame 
print(df.head())

    label                                         image_data
0  benign  [b'\xff', b'\xd8', b'\xff', b'\xe0', b'\x00', ...
1  benign  [b'\xff', b'\xd8', b'\xff', b'\xe0', b'\x00', ...
2  benign  [b'\xff', b'\xd8', b'\xff', b'\xe0', b'\x00', ...
3  benign  [b'\xff', b'\xd8', b'\xff', b'\xe0', b'\x00', ...
4  benign  [b'\xff', b'\xd8', b'\xff', b'\xe0', b'\x00', ...


  df = pd.read_sql(query, connection)  # Read into a pandas DataFrame


In [14]:
# Function to process the DataFrame and extract image arrays and labels
def process_dataframe(df):
    images = []
    labels = []
    for index, row in df.iterrows():
        label = row['label']
        binary_data = row['image_data']
        
        # Convert binary data to an image
        image = Image.open(io.BytesIO(binary_data))
        # Resize image to a consistent size (e.g., 224x224)
        resized_image = image.resize((224, 224))
        # Convert the image to a numpy array and normalize pixel values
        image_array = np.array(resized_image) / 255.0
        
        # Append processed image and label
        images.append(image_array)
        labels.append(label)
    
    # Convert lists to numpy arrays
    X = np.array(images)
    y = np.array(labels)
    
    return X, y

In [15]:
# Process the DataFrame to get image arrays and labels
X, y = process_dataframe(df)

In [16]:
# View the shape of the arrays to confirm they are correctly processed
print(X.shape)  # Image data shape
print(y.shape)  # Labels shape

(810, 224, 224, 3)
(810,)


In [17]:
# Convert labels to numerical values
y = pd.Categorical(y).codes
y = to_categorical(y)

In [18]:
#Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)


In [19]:
# # Fixing Imbalanced Datasets
# from imblearn.over_sampling import RandomOverSampler

# oversample = RandomOverSampler()
# Data, Label  = oversample.fit_resample(data, label)
# Data = np.array(Data).reshape(-1, 28, 28, 3)
# print('Shape of Data :', Data.shape)

In [20]:
# Label = np.array(Label)
# Label

In [21]:
# # Encode labels to numerical values
# y = pd.Categorical(y).codes
# y = to_categorical(y)

In [22]:
###################
# # Split the DataSet
# from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(image_data, label, test_size=0.05, random_state=42)

In [23]:
# # Split the DataSet
# from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [24]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(769, 224, 224, 3)
(769, 2)
(41, 224, 224, 3)
(41, 2)


In [25]:
# #####################
# # convert data to categorical types
# from keras.utils.np_utils import to_categorical

# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)

In [26]:
# # Convert data to categorical types
# from keras.utils import to_categorical

# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)


In [27]:
# Create Image Data Generation

datagen = ImageDataGenerator(rescale=(1./255)
                             ,rotation_range=10
                             ,zoom_range = 0.1
                             ,width_shift_range=0.1
                             ,height_shift_range=0.1)

testgen = ImageDataGenerator(rescale=(1./255))

In [28]:
# # Build CNN (conv neural network) Model
# def model(input_shape=(224, 224, 3), num_classes=1000):
#     model = keras.models.Sequential()
    
#     # Block 1
#     model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape))
#     model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
#     model.add(BatchNormalization())

#     # Block 2
#     model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
#     model.add(BatchNormalization())

#     # Block 3
#     model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
#     model.add(BatchNormalization())

#     # Block 4
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
#     model.add(BatchNormalization())

#     # Block 5
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
#     model.add(BatchNormalization())

#     # Flatten and Fully Connected Layers
#     model.add(layers.Flatten())
#     model.add(layers.Dense(4096, activation='relu'))
#     model.add(layers.Dense(4096, activation='relu'))
#     model.add(layers.Dense(num_classes, activation='softmax'))
    
#     return model

# model.compile(optimizer='adam',loss= 'categorical_crossentropy', metrics= ['accuracy'])
# model.summary()

In [29]:
# Build CNN  Model
# Change num_classes to 2
def build_model(input_shape=(224, 224, 3), num_classes=2):
    model = keras.models.Sequential()
    
    # Block 1
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 2
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 3
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 4
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 5
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Flatten and Fully Connected Layers
    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model


In [30]:
# Instantiate and compile the model
model = build_model(input_shape=(224, 224, 3), num_classes=2)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [31]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=90)


Epoch 1/50
[1m7/9[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m1:46[0m 53s/step - accuracy: 0.6389 - loss: 13.7902

In [None]:
train_score = model.evaluate(X_train, y_train, verbose= 1)
test_score = model.evaluate(X_test, y_test, verbose= 1)

print("Train Loss: ", train_score[0])
print("Train Accuracy: ", train_score[1])
print('-' * 20)
print("Test Loss: ", test_score[0])
print("Test Accuracy: ", test_score[1])

In [None]:
# Make Predictions
predictions = model.predict(X_test)
y_pred = np.argmax(predictions, axis=1)
y_true = np.argmax(y_test, axis=1)


In [None]:
##############
# Make Predictions
predictions=model.predict_classes(y_train)

check=[]
for i in range(len(y_test)):
  for j in range(7):
    if(y_test[i][j]==1):
      check.append(j)
check=np.asarray(check)

print(classification_report(check, predictions))

In [None]:
# Display classification report
print(classification_report(y_true, y_pred))

In [None]:
# Display confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
import matplotlib.pyplot as plt
plt.figure(figsize=(6, 6))
plt.imshow(conf_matrix, cmap='Blues')
plt.title('Confusion Matrix')
plt.colorbar()
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

In [None]:
model.save('modelv1.h5')