In [1]:
import os
import numpy as np
import pandas as pd
import polars as pl
from tqdm import tqdm
import h5py
from io import BytesIO
from tqdm import tqdm

# PyTorch installers
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import mobilenet_v3_large
import torch.optim as optim
from torch.nn import Softmax
from torchvision.models.feature_extraction import create_feature_extractor

# image dataset
import albumentations
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import ConcatDataset

# sklearn import
import sklearn
from sklearn.decomposition import PCA
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GroupKFold

# data visualisation
import plotly.express as px
import plotly.graph_objects as go
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image

# models
import lightgbm as lgb
from catboost import CatBoostClassifier, Pool

## Configurations

In [2]:
class cng:
    image_dim = 224
    multi = 20
    kfold = 5
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    num_classes = 7
    pretrained_model_path = "/kaggle/input/mobilenet-v3-large-weights/mobilenet_v3_large-weights.pth"
    custom_model_path = "/kaggle/input/cnn-finetuning/3.pth"
    h_100_model_path = "/kaggle/input/h-100-weights-15/15.pth"
    # LightGBM parameters
    lgb_w = 0.31
    lgb_p = {
        'min_child_samples': 48,
        'num_iterations': 6000,
        'learning_rate': 0.03,
        'objective': 'binary',
        'extra_trees': True,
        'metric': 'binary',
        'reg_lambda': 0.8,
        'reg_alpha': 0.1,
        'num_leaves': 64,
        'device': 'cpu',
        'max_bin': 128,
        'max_depth': 4,
        'verbose': -1,
        'seed': 42
    }
    
    # CatBoost weight and parameters
    ctb_w = 0.59
    ctb_p = {
        'grow_policy': 'Depthwise',
        'loss_function': 'Logloss',
        'min_child_samples': 48,
        'learning_rate': 0.03,
        'random_state': 42,
        'task_type': 'CPU',
        'reg_lambda': 0.8,
        'num_trees': 6000,
        'depth': 4
    }
    
    cnn_w = 0.10
    early_stop = 30
    lgb_weight = 1
    test_batch_size = 64

In [3]:
CONFIG = {
    "lab":[
        'tbp_lv_A',
        'tbp_lv_Aext',
        'tbp_lv_B',
        'tbp_lv_Bext',
        'tbp_lv_deltaA',
        'tbp_lv_deltaB',
        'tbp_lv_deltaLB',
        'tbp_lv_deltaLBnorm',
        'tbp_lv_stdLExt',
        ],
    "lch":[
        'tbp_lv_C',
        'tbp_lv_Cext',
        'tbp_lv_H',
        'tbp_lv_Hext',
        ],
    "int_features":[
        'target',
        'age_approx',
        'tbp_lv_symm_2axis_angle'],
    "float_features":[
        'clin_size_long_diam_mm', 
        'tbp_lv_A', 
        'tbp_lv_Aext', 
        'tbp_lv_B', 
        'tbp_lv_Bext', 
        'tbp_lv_C', 
        'tbp_lv_Cext',
        'tbp_lv_H', 
        'tbp_lv_Hext',
        'tbp_lv_L', 
        'tbp_lv_Lext',
        'tbp_lv_areaMM2', 
        'tbp_lv_area_perim_ratio',
        'tbp_lv_color_std_mean',
        'tbp_lv_deltaA', 
        'tbp_lv_deltaB',
        'tbp_lv_deltaL', 
        'tbp_lv_deltaLB',
        'tbp_lv_deltaLBnorm',
        'tbp_lv_eccentricity', 
        'tbp_lv_minorAxisMM',
        'tbp_lv_nevi_confidence',
        'tbp_lv_norm_border',
        'tbp_lv_norm_color',
        'tbp_lv_perimeterMM',
        'tbp_lv_radial_color_std_max',
        'tbp_lv_stdL',
        'tbp_lv_stdLExt',
        'tbp_lv_symm_2axis',
        'tbp_lv_x',
        'tbp_lv_y',
        'tbp_lv_z'],
    "categ_features":[
        'sex', 
        'anatom_site_general', 
        'tbp_tile_type', 
        'tbp_lv_location'],
}

## Classes

In [4]:
class Metrics:
    
    @staticmethod
    def calculate_pauc(y_true, y_scores, tpr_threshold=0.8):
        
        # Calculate ROC curve
        fpr, tpr, thresholds = roc_curve(y_true, y_scores)
        
        # Create a mask for TPR values above the threshold
        mask = tpr >= tpr_threshold
        
        # Filter FPR and TPR values based on the mask
        fpr_above_threshold = fpr[mask]
        tpr_above_threshold = tpr[mask]
        
        # Calculate the partial AUC
        partial_auc = auc(fpr_above_threshold, tpr_above_threshold)
        
        # Normalize the partial AUC
        pauc = partial_auc * (1 - tpr_threshold)
        
        return pauc  
    
    @staticmethod
    def plot_cv(fold_scores, model_name):
        
        # Round the fold scores to 4 decimal places
        fold_scores = [round(score, 4) for score in fold_scores]
        mean_score = round(np.mean(fold_scores), 4)
        std_score = round(np.std(fold_scores), 4)

        # Create a new figure for plotting
        fig = go.Figure()

        # Add scatter plot for individual fold scores
        fig.add_trace(go.Scatter(
            x = list(range(1, len(fold_scores) + 1)),
            y = fold_scores,
            mode = 'lines+markers',
            name = 'Fold Scores',
            line = dict(color = '#E30B5C', width = 2), # Raspberry
            marker = dict(size = 12, color = '#E30B5C'), # Larger markers, Raspberry
            text = [f'{score:.4f}' for score in fold_scores],
            hovertemplate = 'Fold %{x}: %{text}<extra></extra>'
        ))

        # Add a horizontal line for the mean score
        fig.add_trace(go.Scatter(
            x = [1, len(fold_scores)],
            y = [mean_score, mean_score],
            mode = 'lines',
            name = f'Mean: {mean_score:.4f}',
            line = dict(dash = 'dash', color = '#FFAC1C'), # Bright Orange
            hoverinfo = 'none'
        ))

        # Update the layout of the plot
        fig.update_layout(
            title = f'{model_name} Cross-Validation pAUC Scores | Variation of CV scores: {mean_score} ± {std_score}',
            xaxis_title = 'Fold',
            yaxis_title = 'pAUC Score',
            plot_bgcolor = 'rgba(0,0,0,0)',
            paper_bgcolor = 'rgba(0,0,0,0)',
            xaxis = dict(
                gridcolor = 'lightgray',
                tickmode = 'linear',
                tick0 = 1,
                dtick = 1,
                range = [0.5, len(fold_scores) + 0.5]
            ),
            yaxis = dict(gridcolor = 'lightgray')
        )

        # Display the plot
        fig.show() 
        
    @staticmethod
    def plot_cm(y_true, y_pred):
        
        # Get unique labels
        labels = sorted(np.unique(y_true))
        
        # Compute confusion matrix
        cm = confusion_matrix(y_true, 
                              y_pred=(y_pred > 0.5).astype(int), 
                              labels=labels)
      
        # Create the heatmap
        fig = go.Figure(data=go.Heatmap(
            z=cm,
            x=labels,
            y=labels,
            colorscale='Redor',
            zmin=0,
            
            # Use the maximum value in the confusion matrix
            zmax=np.max(cm),  
            text=cm,
            texttemplate='%{text:.0f}',
            hovertemplate='True: %{y}<br>Predicted: %{x}<br>Count: %{z:,.0f}<extra></extra>',
            
            # Create a custom hover text format
            customdata = [str(int(val)) for val in cm.flatten()]
        ))
        
        # Update layout for a transparent background and square aspect ratio
        fig.update_layout(
            plot_bgcolor='rgba(0,0,0,0)',
            paper_bgcolor='rgba(0,0,0,0)',
            xaxis_title='Predicted Labels',
            yaxis_title='True Labels',
            xaxis=dict(constrain='domain'),
            yaxis=dict(constrain='domain', scaleanchor='x'),
            width=800,  
            height=800,  
            margin=dict(t=80, b=80, l=80, r=80) 
        )
        
        # Show the plot
        fig.show()

## Feature engineering

In [5]:
def redu_cols(df, valid_cols):
    df = df[valid_cols]
    return df

In [6]:
def remove_cols(df, cols):
    for col in cols:
        if col in df.columns:
            df = df.drop(cols, axis=1)
    return df

In [7]:
def select_color_scheme(df, colorset):
    # https://sensing.konicaminolta.us/us/blog/identifying-color-differences-using-l-a-b-or-l-c-h-coordinates/
    if colorset == "lab":
        df = df.drop(CONFIG["lch"], axis=1)
    elif colorset == "lch":
        df = df.drop(CONFIG["lab"], axis=1)
    return df

In [8]:
def populate_sex(df):
    sexes = df.groupby("patient_id")["sex"].max().reset_index()
    df["sex"] = df2.apply(lambda x: sexes.loc[sexes["patient_id"]==x["patient_id"], "sex"].values[0], axis=1)
    return df

In [9]:
def populate_age(df):
    if df["age_approx"].dtype != "str":
        df["age_approx"] = df["age_approx"].fillna(-1).astype(int).astype(object)
    return df

In [10]:
def numeric_col(df):
    for col in CONFIG["int_features"]:

        # Set dtype for numeric columns (int)
        if col in df.columns:
            df[col] = df[col].fillna(-1).astype(int)
    
    # Define numeric columns (float)
    for col in CONFIG["float_features"]: 

        # Set dtype for numeric columns (float)
        if col in df.columns:
            df[col] = df[col].astype(float)
    return df

In [11]:
def categ_col(df):
    # Define categorical columns
    for col in CONFIG["categ_features"]:
        # Set dtype for categorical columns
        if col in df.columns:
            col_list = list(df[col].unique())
            df[col] = df[col].apply(lambda x: col_list.index(x))
    return df

## Load Data

In [12]:
train_df = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')
test_df = pd.read_csv("/kaggle/input/isic-2024-challenge/test-metadata.csv")

  train_df = pd.read_csv('/kaggle/input/isic-2024-challenge/train-metadata.csv')


In [13]:
train_df.head()

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


In [16]:
400665/393

1019.5038167938932

In [None]:
valid_cols_test = test_df.columns.to_list()
valid_cols_train = valid_cols_test + ["target"]

In [None]:
positive_samples = train_df.loc[train_df["target"]==1]
negative_samples = train_df.loc[train_df["target"]==0]

df = pd.concat([positive_samples, negative_samples.sample(positive_samples.shape[0]*cng.multi)])
df.reset_index(drop=True, inplace=True)

In [None]:
sgkf = StratifiedKFold(n_splits=cng.kfold)

for fold, ( _, val_) in enumerate(sgkf.split(df, df.target, df.patient_id)):
      df.loc[val_ , "kfold"] = int(fold)

df["kfold"] = df["kfold"].astype(int)

In [None]:
df['path'] = df['isic_id'].apply(lambda x: os.path.join('/kaggle/input/isic-2024-challenge/train-image/image', x+'.jpg'))
train_paths = df[["path", "isic_id", "target"]]
test_paths = test_df["isic_id"]

## Feature Engineering

In [None]:
df = redu_cols(df, valid_cols_train)
cols = ["attribution", "copyright_license", "image_type", "isic_id", "tbp_lv_location_simple"]
df = remove_cols(df, cols)
df = select_color_scheme(df, "lab")
# df = populate_sex(df)
df = populate_age(df)
df = numeric_col(df)
df = categ_col(df)

In [None]:
test_df = redu_cols(test_df, valid_cols_test)
cols = ["attribution", "copyright_license", "image_type", "isic_id", "tbp_lv_location_simple"]
test_df = remove_cols(test_df, cols)
test_df = select_color_scheme(test_df, "lab")
# df = populate_sex(df)
test_df = populate_age(test_df)
test_df = numeric_col(test_df)
test_df = categ_col(test_df)

## Dataset

In [None]:
class CLAHETransform:
    def __init__(self, clip_limit=(1, 4), p=1.0):
        self.transform = albumentations.CLAHE(clip_limit=clip_limit, p=p)

    def __call__(self, img):
        # Convert PIL image to numpy array
        img_np = np.array(img)
        
        # Apply CLAHE
        augmented = self.transform(image=img_np)
        img_np = augmented['image']
        
        # Convert numpy array back to PIL image
        return Image.fromarray(img_np)

# Define the complete transformation pipeline
train_transform = transforms.Compose(
    [
        transforms.Resize((cng.image_dim, cng.image_dim), interpolation=Image.NEAREST),  
        transforms.RandomRotation(25, interpolation=Image.NEAREST),  
        transforms.RandomAffine(
            degrees=0, 
            translate=(0.15, 0.15),  
            shear=15,  # Shear by 15%
            interpolation=Image.NEAREST
        ),
        transforms.RandomHorizontalFlip(p=1.0),  
        transforms.RandomVerticalFlip(p=1.0),   
        transforms.ColorJitter(brightness=(0.9, 1.5)),  
        CLAHETransform(clip_limit=(1, 4), p=1.0),  
        transforms.ToTensor()
    ]
)

# Define the complete transformation pipeline
test_transform = transforms.Compose(
    [
        transforms.Resize((cng.image_dim, cng.image_dim), interpolation=Image.NEAREST),  
        CLAHETransform(clip_limit=(1, 4), p=1.0),  
        transforms.ToTensor()
    ]
)

In [None]:
class meDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, df, transform=None):
        self.df = df
        
        if not transform:
            self.transform = transforms.Compose([
                transforms.ToTensor()
            ])
        else:
            self.transform = transform
        
    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):

        path = self.df.loc[idx, 'path']
        label = self.df.loc[idx, 'target']
#         enc_label = enc.transform([[label]]).toarray()
        name = self.df.loc[idx, 'isic_id']
        
        pil_image = Image.open(path)
        torch_image = self.transform(pil_image)
        
#         return torch_image, label, name
        return {
            "image": torch_image, 
            "target": label,
            "imageid": name
        }


In [None]:
class meDataset_inference(Dataset):
    def __init__(self, df, file_hdf, transforms=None):
        self.df = df
        self.fp_hdf = h5py.File(file_hdf, mode="r")
        self.isic_ids = df['isic_id'].values
        self.transforms = transforms
        
    def __len__(self):
        return len(self.isic_ids)
    
    def __getitem__(self, index):
        isic_id = self.isic_ids[index]
        img = Image.open(BytesIO(self.fp_hdf[isic_id][()]))
        
        if self.transforms:
            img = self.transforms(img)
            
        return {
            "image": img,
            "id": isic_id
        }

## Model architecture

In [None]:
class ECANet(nn.Module):
    def __init__(self, k_size=3):
        super(ECANet, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k_size, padding=(k_size - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        y = self.avg_pool(x)
        y = self.conv(y.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
        y = self.sigmoid(y)
        return x * y.expand_as(x)

In [None]:
class MobileNetV3WithECA(nn.Module):
    def __init__(self, num_classes=2):
        super(MobileNetV3WithECA, self).__init__()
        self.mobilenet_v3 = mobilenet_v3_large(pretrained=False)
#         self.mobilenet_v3.load_state_dict(torch.load(cng.pretrained_model_path))
        se_layers = [4, 5, 6, 11, 12, 13, 14, 15]
        
        # Replace SE blocks with ECA blocks in InvertedResidual blocks
        for i in se_layers:
            setattr(self.mobilenet_v3.features[i].block, "2", ECANet())
        
        # Transfer Learning
        in_features = self.mobilenet_v3.classifier[3].in_features
        self.mobilenet_v3.classifier[3] = nn.Linear(in_features, num_classes, bias=True)

    def forward(self, x):
        return self.mobilenet_v3(x)

In [None]:
model = MobileNetV3WithECA(num_classes=cng.num_classes)
model.load_state_dict(torch.load(cng.h_100_model_path))
model.to(cng.device)
print("Model loaded!")

## Feature Extraction

In [None]:
return_nodes = {'mobilenet_v3.classifier.0': 'output'}
feature_extractor = create_feature_extractor(model, return_nodes)

In [None]:
# dataset 
train_dataset = meDataset(train_paths, test_transform)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)

test_dataset = meDataset(test_paths, test_transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [None]:
train_features = []
train_labels = []
train_img_ids = []

for i in tqdm(train_loader):
    train_features.append(feature_extractor(i["image"].to(cng.device))["output"].detach().cpu().numpy())
    train_labels.append(i["target"].detach().cpu().numpy())
    train_img_ids.append(i["imageid"])

In [None]:
X_train = np.concatenate(train_features)  # Your training feature array (2D)
y_train = np.concatenate(train_labels)  # Your training label array (1D)

In [None]:
train_df.head()

In [None]:
train_img_ids_2 = list(np.array(train_img_ids))

In [None]:
aa = train_df[["isic_id", "patient_id"]].T
aa.columns = aa.loc["isic_id", :]
aa = aa.drop(["isic_id"], axis=0)

aa_dict = aa.to_dict()

In [None]:
train_patient_id = []
for i in tqdm(train_img_ids_2):
    train_patient_id.append(aa_dict[i[0]]["patient_id"])

## Generate CNN predictions

In [None]:
cv = GroupKFold(5)
for fold, (train_index, valid_index) in enumerate(cv.split(X_train, y_train, groups=train_patient_id)):
    print(fold, train_index)

In [None]:
train_dict = {"features":X_train, "labels":y_train, "groups":train_patient_id}

In [None]:
# Step 1: Create LightGBM Dataset objects
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)

In [None]:
class ModelDevelopment:
        
    def train_lgb(self, train_dict, params, early_stop, cnn_model, pca_model):
        cv = GroupKFold(5)
        
        # Initialize lists to store models and cv scores
        models = []
        scores = []
       
        for fold, (train_index, valid_index) in enumerate(cv.split(train_dict["features"], train_dict["labels"], train_dict["groups"])):
            # Perform cross-validation            
            # Split the data into training and validation sets for the current fold
            train_features = train_dict["features"][train_index]
            train_labels = train_dict["labels"][train_index]
            
            valid_features = train_dict["features"][valid_index]
            valid_labels = train_dict["labels"][valid_index]
                        
            # Create LightGBM datasets
            train_data = lgb.Dataset(train_features, label=train_labels)
            valid_data = lgb.Dataset(valid_features, label=valid_labels, reference=train_data)
        
            # Train the model
            model = lgb.train(params, 
                              train_data, 
                              valid_sets=[valid_data], 
                              callbacks=[lgb.early_stopping(early_stop, verbose=0), 
                                         lgb.log_evaluation(0)])

            # Append the trained model to the list
            models.append(model)

            # Calculate and store the pAUC score for the current (valid) fold
            y_pred = model.predict(valid_features)
            score = Metrics.calculate_pauc(valid_labels, y_pred)
            scores.append(score)

        # Plot the cross-validation results
        Metrics.plot_cv(scores, 'LightGBM')
        
        return models
    
    def train_cat(self, train_dict, params, early_stop, cnn_model, pca_model):
        cv = GroupKFold(5)
        
        # Initialize lists to store models and cv scores
        models = []
        scores = []
       
        for fold, (train_index, valid_index) in enumerate(cv.split(train_dict["features"], train_dict["labels"], train_dict["groups"])):
            # Perform cross-validation            
            # Split the data into training and validation sets for the current fold
            train_features = train_dict["features"][train_index]
            train_labels = train_dict["labels"][train_index]
            
            valid_features = train_dict["features"][valid_index]
            valid_labels = train_dict["labels"][valid_index]
                        
            # Create LightGBM datasets
            train_pool = Pool(train_features, train_labels)
            valid_pool = Pool(valid_features, valid_labels)
            
            # Train the model
            model = CatBoostClassifier(**params, verbose=0)
            model.fit(train_pool, 
                      eval_set=valid_pool, 
                      early_stopping_rounds=cng.early_stop)

            # Append the trained model to the list
            models.append(model)

            # Calculate and store the pAUC score for the current (valid) fold
            y_pred = model.predict_proba(valid_pool)[:, 1]
            
            score = Metrics.calculate_pauc(valid_labels, y_pred)
            scores.append(score)

        # Plot the cross-validation results
        Metrics.plot_cv(scores, 'CatBoost')
        
        return models

    @staticmethod
    def infer_lgb(features, models):

        # Average the predictions of the LightGBM classifiers
        preds = np.mean([model.predict(features) for model in models], axis=0)

        return preds    
    
    @staticmethod
    def infer_cat(features, models):

        # Average the predictions of the LightGBM classifiers
        preds = np.mean([model.predict_proba(features)[:, 1] for model in models], axis=0)

        return preds    
    
    def generate_preds(self, train_dict, lgb_params, cat_params, early_stop, lgb_weight, cat_weight, cnn_model, pca_model=None):
                
        # Train LightGBM and CatBoost
        self.lgb_models = self.train_lgb(train_dict, lgb_params, early_stop, cnn_model, pca_model)
        self.cat_models = self.train_cat(train_dict, cat_params, early_stop, cnn_model, pca_model)
            
            
        train_dataset = meDataset(df.reset_index(drop=True), test_transform)
        train_loader = DataLoader(train_dataset, 
                                  batch_size=64, 
                                  num_workers=2, 
                                  shuffle=True, 
                                  pin_memory=True, 
                                  drop_last=True)
        
        
        # Infer LightGBM and CatBoost on train data
        train_lgb_preds = self.infer_lgb(train_dict["features"], self.lgb_models)        
        train_cat_preds = self.infer_cat(train_dict["features"], self.cat_models)
        train_targets = train_dict["labels"]
        
        # Weight-ensemble LightGBM and CatBoost predictions
        train_preds = (train_lgb_preds * lgb_weight) + (train_cat_preds * cat_weight) 
        
        # Calculate pAUC scores
        train_pauc = Metrics.calculate_pauc(train_targets, train_preds)
        print(f'Ensemble pAUC: {train_pauc:.3f}')
    
        # Plot confusion matrix for Ensemble predictions on train data
        print('Ensemble confusion matrix:')
        Metrics.plot_cm(train_targets, train_preds)  
        
# #         # Infer LightGBM and CatBoost on test data
#         test_lgb_preds = self.infer_lgb(test_df.drop(["patient_id"], axis=1), self.lgb_models)        
#         test_cat_preds = self.infer_cat(test_df.drop(["patient_id"], axis=1), self.cat_models)        
        
# #         # Weight-ensemble LightGBM and CatBoost predictions
#         test_preds = test_lgb_preds * lgb_weight
        
#         return test_preds
        return NOne

In [None]:
md = ModelDevelopment()

In [None]:
preds = md.generate_preds(train_dict,
                          cng.lgb_p,
                          cng.ctb_p,
                          cng.early_stop,
                          cng.lgb_w, 
                          cng.ctb_w,
                          model)

In [None]:
test_df_new = pd.read_csv("/kaggle/input/isic-2024-challenge/test-metadata.csv")

In [None]:
submission_df = pd.DataFrame({"isic_id":test_df_new["isic_id"], "target":preds})
submission_df.to_csv("submission.csv", index=False)