# Skin Cancer Classification - Convolutional Network

### by ReDay Zarra

This project utilizes a convolutional network to **identify 9 different kinds of skin cancers** including melanoma, nevus, and more. The model is **trained on over 2,200 pictures of various skin cancers** based off of this [dataset](https://www.kaggle.com/datasets/nodoubttome/skin-cancer9-classesisic). This model implements fundamental computer vision and classification techniques and includes a *step-by-step implementation of the model* as well as *in-depth notes to customize the model further* for higher accuracy.

## Importing the necessary libraries

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn

In [14]:
import os
import pickle

# Specify the directory containing the Train and Test folders
main_folder = 'skin-cancers'
train_folder = 'Train'

# Initialize an empty list to store the image data and labels
train_data = []

# Iterate through the sub-folders in the Train folder
for sub_folder in os.listdir(os.path.join(main_folder, train_folder)):
    sub_folder_path = os.path.join(main_folder, train_folder, sub_folder)
    for image_file in os.listdir(sub_folder_path):
        # Add the image data and label to the list
        with open(os.path.join(sub_folder_path, image_file), 'rb') as f:
            image_data = f.read()
        # Assign the label as the sub-folder's name
        label = sub_folder
        train_data.append((image_data, label))

# Save the train data list as a .p file using pickle
with open(os.path.join(main_folder, 'train.p'), 'wb') as f:
    pickle.dump(train_data, f)

# Repeat steps for testing data
test_folder = 'Test'
test_data = []
for sub_folder in os.listdir(os.path.join(main_folder, test_folder)):
    sub_folder_path = os.path.join(main_folder, test_folder, sub_folder)
    for image_file in os.listdir(sub_folder_path):
        with open(os.path.join(sub_folder_path, image_file), 'rb') as f:
            image_data = f.read()
        label = sub_folder
        test_data.append((image_data, label))

with open(os.path.join(main_folder, 'test.p'), 'wb') as f:
    pickle.dump(test_data, f)

In [20]:
import pickle

# Load the train data from the .p file
with open('train.p', 'rb') as f:
    train = pickle.load(f)

# Load the test data from the .p file
with open('test.p', 'rb') as f:
    test = pickle.load(f)

# X_train will contain the image data and y_train will contain the labels
X_train, y_train = [], []
for image_data, label in train:
    X_train.append(image_data)
    y_train.append(label)

# X_test will contain the image data and y_test will contain the labels
X_test, y_test = [], []
for image_data, label in test:
    X_test.append(image_data)
    y_test.append(label)

In [24]:
import numpy as np

# Convert the list of image data into a numpy array
X_train = np.array(X_train)
y_train = np.array(y_train)

(2239,)
(2239,)


In [25]:
import numpy as np

# Convert the list of image data into a numpy array
X_test = np.array(X_test)
y_test = np.array(y_test)

In [27]:
X_train.shape

(2239,)

In [28]:
y_train.shape

(2239,)

In [29]:
X_test.shape

(118,)

In [30]:
y_test.shape

(118,)