In [2]:
import os
import pandas as pd 
import numpy as np

import tensorflow 
from tensorflow import keras
from keras import layers, regularizers

import cv2 #images

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## import dependencies for model
%matplotlib inline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical, normalize
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy

from sklearn.metrics import classification_report, confusion_matrix

In [3]:
# Database connection parameters (Update these with your actual credentials)
import os

DB_HOST = os.environ.get("DB_HOST")
DB_NAME = os.environ.get("DB_NAME")
DB_USER = os.environ.get("DB_USER")
DB_PASS = os.environ.get("DB_PASS")

In [4]:
# Define paths for the dataset directories
benign_dir = 'train_cancer/benign'
malignant_dir = 'train_cancer/malignant'

In [5]:
# Collect image paths and labels for PostgreSQL upload
data = []
for category, folder in [('benign', benign_dir), ('malignant', malignant_dir)]:
    for filename in os.listdir(folder):
        if filename.endswith('.jpg'):
            file_path = os.path.join(folder, filename)
            data.append({'file_path': file_path, 'label': category})


In [6]:
# Create a DataFrame
df = pd.DataFrame(data)

In [7]:
# Display the DataFrame to verify the paths and labels
display(df.head())

Unnamed: 0,file_path,label
0,train_cancer/benign/14.jpg,benign
1,train_cancer/benign/28.jpg,benign
2,train_cancer/benign/29.jpg,benign
3,train_cancer/benign/17.jpg,benign
4,train_cancer/benign/12.jpg,benign


In [8]:
# Export the DataFrame to a CSV file
df.to_csv('output.csv', index=False)

In [9]:
import psycopg2

# Step: Upload image metadata to PostgreSQL
try:
    # Connect to PostgreSQL
    connection = psycopg2.connect(
        host=DB_HOST,
        database=DB_NAME,
        user=DB_USER,
        password=DB_PASS
    )
    cursor = connection.cursor()

    # Create a table for storing image metadata
    create_table_query = '''
    CREATE TABLE IF NOT EXISTS skin_cancer_images (
        id SERIAL PRIMARY KEY,
        file_path TEXT,
        label TEXT
    );
    '''
    cursor.execute(create_table_query)
    connection.commit()
    print("Table created successfully.")

    # Additional code for inserting data into the table will go here

except Exception as e:
    print(f"An error occurred: {e}")
finally:
    # Make sure to close the connection if it was established
    if 'connection' in locals() and connection is not None:
        cursor.close()
        connection.close()
        print("Database connection closed.")


An error occurred: fe_sendauth: no password supplied



In [10]:
df = pd.read_csv('path_to_dataset.csv')
x = df.drop('label',axis=1)
y = df['label']
x = x.to_numpy()
x = x/255
y = to_categorical(y)

FileNotFoundError: [Errno 2] No such file or directory: 'path_to_dataset.csv'

In [None]:
# #labeling manually because there are only 7
label = {
    'Actinic keratoses':0,
    'Basal cell carcinoma':1,
    'Benign keratosis-like lesions':2,
    'Dermatofibroma':3,
    'Melanocytic nevi':4,
    'Vascular lesions':5,
    'Melanoma':6,
}

In [None]:
# Images 

In [None]:
# load images in 
def load_images(image_paths):
    images = []
    for path in image_paths:
        img = cv2.imread(path)
        img = cv2.resize(img, (224, 224)) 
        images.append(img)
    return np.array(images)

# Assuming you have a column with image paths
images = load_images(df['image_path'].values)
labels = df['label'].values 

In [None]:
# Split the DataSet

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.05, random_state=42)

In [None]:
# Build CNN (conv neural network) Model 

In [None]:
def build_cnn_model(input_shape=(224, 224, 3), num_classes=1000):
    model = models.Sequential()
    
    # Block 1
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 2
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 3
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 4
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Block 5
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    model.add(layers.MaxPooling2D((2, 2), strides=(2, 2)))
    model.add(BatchNormalization())

    # Flatten and Fully Connected Layers
    model.add(layers.Flatten())
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dense(4096, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model









In [None]:
model.compile(optimizer='adam',loss= 'categorical_crossentropy', metrics= ['accuracy'])


In [None]:
model.summary()

In [None]:
#Training our CNN Model
class_weights = {0:1,1:0.5,2:1,3:1,4:1,5:1,6:1}
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=90, class_weight=class_weights,validation_data=(y_train, y_test))

In [None]:
#Evaluate the Model 
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc}')

In [None]:
# Make Predictions
predictions=model.predict_classes(y_train)

check=[]
for i in range(len(y_test)):
  for j in range(7):
    if(y_test[i][j]==1):
      check.append(j)
check=np.asarray(check)

print(classification_report(check, predictions))

In [None]:
# model.save('modelv1.h5')

In [None]:
# Check accuracy
from sklearn.metrics import accuracy_score
y_pred = log_classifier.predict(X_test)
print(f" Logistic regression model accuracy: {accuracy_score(y_test,y_pred):.3f}")