In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
extra_train_df = pd.read_csv("C:/Users/Manik/Desktop/fitzpatrick17k.csv")

In [4]:
extra_train_df['label'].unique()

array(['drug induced pigmentary changes', 'photodermatoses',
       'dermatofibroma', 'psoriasis', 'kaposi sarcoma',
       'neutrophilic dermatoses', 'granuloma annulare',
       'nematode infection', 'allergic contact dermatitis',
       'necrobiosis lipoidica', 'hidradenitis', 'melanoma',
       'acne vulgaris', 'sarcoidosis', 'xeroderma pigmentosum',
       'actinic keratosis', 'scleroderma', 'syringoma', 'folliculitis',
       'pityriasis lichenoides chronica', 'porphyria',
       'dyshidrotic eczema', 'seborrheic dermatitis', 'prurigo nodularis',
       'acne', 'neurofibromatosis', 'eczema', 'pediculosis lids',
       'basal cell carcinoma', 'pityriasis rubra pilaris',
       'pityriasis rosea', 'livedo reticularis',
       'stevens johnson syndrome', 'erythema multiforme',
       'acrodermatitis enteropathica', 'epidermolysis bullosa',
       'dermatomyositis', 'urticaria', 'basal cell carcinoma morpheiform',
       'vitiligo', 'erythema nodosum', 'lupus erythematosus',
       '

In [5]:
# 1) Define your 21 target diseases
desired_diseases = [
    "squamous-cell-carcinoma",
    "basal-cell-carcinoma",
    "folliculitis",
    "acne-vulgaris",
    "melanoma",
    "eczema",
    "acne",
    "mycosis-fungoides",
    "actinic-keratosis",
    "prurigo-nodularis",
    "kaposi-sarcoma",
    "keloid",
    "dermatomyositis",
    "superficial-spreading-melanoma-ssm",
    "pyogenic-granuloma",
    "malignant-melanoma",
    "epidermal-nevus",
    "dyshidrotic-eczema",
    "dermatofibroma",
    "seborrheic-keratosis",
    "basal-cell-carcinoma-morpheiform"
]

# 2) Filter the DataFrame
train_df_filtered = extra_train_df[extra_train_df['label'].isin(desired_diseases)]

# 3) Check the distribution to confirm only the 21 diseases remain
print(train_df_filtered['label'].value_counts())

# 4) (Optional) Overwrite the old DataFrame or save to new CSV
extra_train_df.to_csv("filtered_train.csv", index=False)
# train_df.to_csv("filtered_train.csv", index=False)  # If you want a CSV output


label
folliculitis       342
melanoma           261
eczema             204
acne               183
keloid             156
dermatomyositis    151
dermatofibroma      79
Name: count, dtype: int64


In [6]:
print(extra_train_df.columns)

Index(['md5hash', 'fitzpatrick_scale', 'fitzpatrick_centaur', 'label',
       'nine_partition_label', 'three_partition_label', 'qc', 'url',
       'url_alphanum'],
      dtype='object')


In [7]:
print(extra_train_df['label'].unique())

['drug induced pigmentary changes' 'photodermatoses' 'dermatofibroma'
 'psoriasis' 'kaposi sarcoma' 'neutrophilic dermatoses'
 'granuloma annulare' 'nematode infection' 'allergic contact dermatitis'
 'necrobiosis lipoidica' 'hidradenitis' 'melanoma' 'acne vulgaris'
 'sarcoidosis' 'xeroderma pigmentosum' 'actinic keratosis' 'scleroderma'
 'syringoma' 'folliculitis' 'pityriasis lichenoides chronica' 'porphyria'
 'dyshidrotic eczema' 'seborrheic dermatitis' 'prurigo nodularis' 'acne'
 'neurofibromatosis' 'eczema' 'pediculosis lids' 'basal cell carcinoma'
 'pityriasis rubra pilaris' 'pityriasis rosea' 'livedo reticularis'
 'stevens johnson syndrome' 'erythema multiforme'
 'acrodermatitis enteropathica' 'epidermolysis bullosa' 'dermatomyositis'
 'urticaria' 'basal cell carcinoma morpheiform' 'vitiligo'
 'erythema nodosum' 'lupus erythematosus' 'lichen planus'
 'sun damaged skin' 'drug eruption' 'scabies' 'cheilitis'
 'urticaria pigmentosa' 'behcets disease' 'nevocytic nevus'
 'mycosis fungo

In [None]:
# Our 21 diseases are:
desired_diseases = [
    "squamous-cell-carcinoma",
    "basal-cell-carcinoma",
    "folliculitis",
    "acne-vulgaris",
    "melanoma",
    "eczema",
    "acne",
    "mycosis-fungoides",
    "actinic-keratosis",
    "prurigo-nodularis",
    "kaposi-sarcoma",
    "keloid",
    "dermatomyositis",
    "superficial-spreading-melanoma-ssm",
    "pyogenic-granuloma",
    "malignant-melanoma",
    "epidermal-nevus",
    "dyshidrotic-eczema",
    "dermatofibroma",
    "seborrheic-keratosis",
    "basal-cell-carcinoma-morpheiform"
]

# Defining label mapping dictionary 
label_mapping = {
    "squamous cell carcinoma": "squamous-cell-carcinoma",
    "basal cell carcinoma": "basal-cell-carcinoma",
    "folliculitis": "folliculitis",
    'acne vulgaris': 'acne-vulgaris',
    "melanoma": "melanoma",
    "eczema": "eczema",
    "acne": "acne",
    "mycosis fungoides":"mycosis-fungoides",
    "actinic keratosis ":"actinic-keratosis ",
    "prurigo nodularis" : "prurigo-nodularis",
    "kaposi sarcoma":"kaposi-sarcoma",
    "keloid":"keloid",
    "dermatomyositis":"dermatomyositis",
    "superficial spreading melanoma ssm": "superficial-spreading-melanoma-ssm",
    "pyogenic granuloma":"pyogenic-granuloma",
    "malignant melanoma": "malignant-melanoma",
    "epidermal nevus": "epidermal-nevus",
    "dyshidrotic eczema": "dyshidrotic-eczema",
    "dermatofibroma": "dermatofibroma",
    "seborrheic keratosis": "seborrheic-keratosis",
    "basal cell carcinoma morpheiform": "basal-cell-carcinoma-morpheiform"
}

# 1) Replace the raw labels with normalized ones
extra_train_df['label'] = extra_train_df['label'].replace(label_mapping)

# 2) Filter to keep only the 21 diseases we want
filtered_df = extra_train_df[extra_train_df['label'].isin(desired_diseases)]

print(filtered_df['label'].value_counts())
print(filtered_df.shape)


label
squamous-cell-carcinoma               581
basal-cell-carcinoma                  468
folliculitis                          342
acne-vulgaris                         335
melanoma                              261
eczema                                204
acne                                  183
mycosis-fungoides                     182
prurigo-nodularis                     170
kaposi-sarcoma                        156
keloid                                156
dermatomyositis                       151
superficial-spreading-melanoma-ssm    118
pyogenic-granuloma                    113
malignant-melanoma                    111
epidermal-nevus                        91
dyshidrotic-eczema                     83
dermatofibroma                         79
seborrheic-keratosis                   69
basal-cell-carcinoma-morpheiform       62
Name: count, dtype: int64
(3915, 9)


In [None]:
# filtered_df.to_csv("filtered_train.csv", index=False)  # If you want a CSV output

In [None]:
import csv
import requests

# Define the base path for the extended dataset
base_path = 'C:/Users/Manik/Desktop/BTT Project/BTAI-AJL-Team-2/bttai-ajl-2025/extended_dataset'

# Define headers to mimic a browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}

csv_path = "C:/Users/Manik/Desktop/BTT Project/BTAI-AJL-Team-2/bttai-ajl-2025/filtered_train.csv"

# Open the CSV file and process each row
with open(csv_path, mode='r', encoding='utf-8') as dataset:
    file = csv.reader(dataset)
    headers_row = next(file)  # Skip the header row

    for line in file:
        img_name = line[0]  # md5hash as the image name
        category = line[3]  # label as the category (e.g., "acne", "melanoma")
        img_url = line[7]   # URL of the image

        if not img_url:
            print(f"Skipping {img_name}, empty URL")
            continue

        print(f"Downloading: {img_name} from {img_url}")

        try:
            response = requests.get(img_url, headers=headers, timeout=10)
            if response.status_code == 200:
                # Create the subfolder for the category
                subfolder = os.path.join(base_path, category)
                os.makedirs(subfolder, exist_ok=True)

                # Define the save path within the category subfolder
                save_path = os.path.join(subfolder, f"{img_name}.jpeg")
                with open(save_path, 'wb') as f:
                    f.write(response.content)
            else:
                print(f"Failed to download {img_url}, Status code: {response.status_code}")
        except Exception as e:
            print(f"ERROR: GET request for {img_url} failed with exception {e}")

Downloading: d2bac3c9e4499032ca8e9b07c7d3bc40 from https://www.dermaamin.com/site/images/clinical-pic/d/dermatofibroma/dermatofibroma71.jpg
Downloading: 45f7fe0e10214e32e890cad9d29d4811 from https://www.dermaamin.com/site/images/clinical-pic/k/kaposis-sarcoma/kaposis-sarcoma4.jpg
Downloading: b87804452f60aa162a6d29c0f66a2466 from https://www.dermaamin.com/site/images/clinical-pic/L/lmm/lmm6.jpg
Downloading: d1fb87ee7ee50f997cd6497dd90d6bbb from https://www.dermaamin.com/site/images/clinical-pic/a/acne_vulgaris/acne_vulgaris150.jpg
Downloading: 4c3f795cf8eb72b946f9bd2642cf23c1 from https://www.dermaamin.com/site/images/clinical-pic/m/melanoma/melanoma17.jpg
Downloading: 99247c9fe486aa9ab71686c8e676c135 from https://www.dermaamin.com/site/images/clinical-pic/d/dermatofibroma/dermatofibroma13.jpg
Downloading: f968e591e15f47b544e551bc3cc5b8d3 from https://www.dermaamin.com/site/images/clinical-pic/f/folliculitis/folliculitis89.jpg
Downloading: cd38c6d449b05025a1e68bd94d387f47 from https://