In [3]:
import pandas as pd
import cv2
import numpy as np

from Feature_Extraction import (
    extract_mc_features,
    extract_mass_features,
    build_master_vector
)

from input import get_mass_data


# Load original dataset
df = pd.read_csv(
    r"E:\Khaled\Breast_Cancer_Detection_Imp\test_dataset.csv"
)

all_rows = []

for idx, row in df.iterrows():

    img_path = row['PATH']
    label    = row['SEVERITY']   # or CLASS

    # Load image
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Could not read {img_path}")
        continue

    # PREPROCESSING 
    mass_region, clean_mask = get_mass_data(img)

    if mass_region is None or clean_mask is None:
        print(f"No mass detected in {img_path}")
        continue

    # MASS FEATURES
    mass_features = extract_mass_features(
        mass_region_gray=mass_region,
        clean_mask=clean_mask
    )

    # MC PREPROCESSING 
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))
    tophat = cv2.morphologyEx(mass_region, cv2.MORPH_TOPHAT, kernel)

    _, mc_bin = cv2.threshold(tophat, 200, 255, cv2.THRESH_BINARY)

    mc_contours, _ = cv2.findContours(
        mc_bin,
        cv2.RETR_EXTERNAL,
        cv2.CHAIN_APPROX_SIMPLE
    )

    # MC FEATURES
    mc_features = extract_mc_features(mc_contours, tophat)

    # MASTER FEATURE VECTOR
    master_vector = build_master_vector(mass_features, mc_features)
    master_vector.append(label)

    all_rows.append(master_vector)


# Save final dataset
FEATURE_COLUMNS = [
    'Area','Perimeter','Circularity','Eccentricity','Solidity','Extent',
    'Mean_Intensity','Max_Intensity','Std_Intensity',
    'Contrast','Dissimilarity','Homogeneity','Energy','Correlation','ASM',
    'MC_Count','MC_AvgArea','MC_StdArea','MC_Density','MC_MeanIntensity',
    'Label'
]

final_df = pd.DataFrame(all_rows, columns=FEATURE_COLUMNS)
final_df.to_excel("svm_features_dataset.xlsx", index=False)


Could not read Datasetll-mias\mdb109.pgm
Could not read Datasetll-mias\mdb287.pgm
Could not read Datasetll-mias\mdb244.pgm
Could not read Datasetll-mias\mdb213.pgm
Could not read Datasetll-mias\mdb141.pgm
Could not read Datasetll-mias\mdb220.pgm
Could not read Datasetll-mias\mdb113.pgm
Could not read Datasetll-mias\mdb263.pgm
Could not read Datasetll-mias\mdb116.pgm
Could not read Datasetll-mias\mdb101.pgm
Could not read Datasetll-mias\mdb160.pgm
Could not read Datasetll-mias\mdb119.pgm
Could not read Datasetll-mias\mdb145.pgm
Could not read Datasetll-mias\mdb227.pgm
Could not read Datasetll-mias\mdb297.pgm
Could not read Datasetll-mias\mdb144.pgm
Could not read Datasetll-mias\mdb315.pgm
Could not read Datasetll-mias\mdb117.pgm
Could not read Datasetll-mias\mdb311.pgm
Could not read Datasetll-mias\mdb301.pgm
Could not read Datasetll-mias\mdb180.pgm
Could not read Datasetll-mias\mdb191.pgm
Could not read Datasetll-mias\mdb129.pgm
Could not read Datasetll-mias\mdb

ModuleNotFoundError: No module named 'openpyxl'

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

import pandas as pd

data = pd.read_csv("train_dataset.csv")

X = data.iloc[:, :-1].values   
y = data.iloc[:, -1].values   


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [None]:
svm_model = SVC(
    kernel='rbf',        
    C=1.0,
    gamma='scale',
    decision_function_shape='ovr'  
)

svm_model.fit(X_train, y_train)


y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print(classification_report(y_test, y_pred))
