In [1]:
import pandas as pd
import numpy as np


In [4]:
# =========================
# INPUT FILES
# =========================

LBP_CSV     = "ham10000_lbp_multiclass_clean_norm.csv"
GLCM_CSV    = "ham10000_glcm_multiclass_clean_norm.csv"
WAV_CSV     = "ham10000_wavelet_norm.csv"

# =========================
# OUTPUT FILES
# =========================

OUT_LBP_GLCM         = "ham10000_lbp_glcm_fused.csv"
OUT_LBP_WAVELET      = "ham10000_lbp_wavelet_fused.csv"
OUT_LBP_GLCM_WAVELET = "ham10000_lbp_glcm_wavelet_fused.csv"

In [5]:
lbp_df  = pd.read_csv(LBP_CSV)
glcm_df = pd.read_csv(GLCM_CSV)
wav_df  = pd.read_csv(WAV_CSV)

print("LBP shape:", lbp_df.shape)
print("GLCM shape:", glcm_df.shape)
print("Wavelet shape:", wav_df.shape)


LBP shape: (10015, 19)
GLCM shape: (10015, 41)
Wavelet shape: (10015, 15)


In [6]:
# Same number of samples
assert len(lbp_df) == len(glcm_df) == len(wav_df), "Row count mismatch!"

# Labels must be identical
assert (lbp_df.iloc[:, -1] == glcm_df.iloc[:, -1]).all(), "LBP vs GLCM labels mismatch!"
assert (lbp_df.iloc[:, -1] == wav_df.iloc[:, -1]).all(), "LBP vs Wavelet labels mismatch!"

print("✔ Sanity checks passed (labels aligned & last column)")


✔ Sanity checks passed (labels aligned & last column)


In [7]:
def fuse_feature_dfs(dfs):
    """
    Concatenate feature DataFrames where label is the LAST column.
    """
    feature_parts = [df.iloc[:, :-1] for df in dfs]  # all but label
    labels = dfs[0].iloc[:, -1]                      # label from first DF

    fused_df = pd.concat(feature_parts, axis=1)
    fused_df["label"] = labels.values                # append label at end

    return fused_df


In [8]:
lbp_glcm_df = fuse_feature_dfs([lbp_df, glcm_df])
lbp_glcm_df.to_csv(OUT_LBP_GLCM, index=False)

print("Saved:", OUT_LBP_GLCM)
print("Shape:", lbp_glcm_df.shape)


Saved: ham10000_lbp_glcm_fused.csv
Shape: (10015, 59)


In [9]:
lbp_wav_df = fuse_feature_dfs([lbp_df, wav_df])
lbp_wav_df.to_csv(OUT_LBP_WAVELET, index=False)

print("Saved:", OUT_LBP_WAVELET)
print("Shape:", lbp_wav_df.shape)


Saved: ham10000_lbp_wavelet_fused.csv
Shape: (10015, 33)


In [10]:
lbp_glcm_wav_df = fuse_feature_dfs([lbp_df, glcm_df, wav_df])
lbp_glcm_wav_df.to_csv(OUT_LBP_GLCM_WAVELET, index=False)

print("Saved:", OUT_LBP_GLCM_WAVELET)
print("Shape:", lbp_glcm_wav_df.shape)


Saved: ham10000_lbp_glcm_wavelet_fused.csv
Shape: (10015, 73)


In [11]:
print("\nFinal fused datasets:")
print("LBP + GLCM:", lbp_glcm_df.shape)
print("LBP + Wavelet:", lbp_wav_df.shape)
print("LBP + GLCM + Wavelet:", lbp_glcm_wav_df.shape)



Final fused datasets:
LBP + GLCM: (10015, 59)
LBP + Wavelet: (10015, 33)
LBP + GLCM + Wavelet: (10015, 73)
