KeyError: "None of [Index(['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean',\n       'smoothness_mean'],\n      dtype='object')] are in the [columns]"

In [None]:
# -------------------------------
# Breast Cancer Prediction Model
# model_building.ipynb
# -------------------------------

# 1️⃣ Imports
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib

# 2️⃣ Load dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['diagnosis'] = data.target  # 0=malignant, 1=benign

# Optional: map target to strings (not strictly necessary for model training)
df['diagnosis_str'] = df['diagnosis'].map({0:'Malignant', 1:'Benign'})

# 3️⃣ Check column names (important!)
print("Columns in dataset:")
print(df.columns)

# 4️⃣ Select 5 features (matching actual column names from sklearn)
features = ['mean radius', 'mean texture', 'mean perimeter', 'mean area', 'mean smoothness']
X = df[features]
y = df['diagnosis']  # 0=Malignant, 1=Benign

# 5️⃣ Encode target (optional, already numeric but keeping for generality)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# 6️⃣ Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42
)

# 8️⃣ Train model (SVM)
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# 9️⃣ Evaluate model
y_pred = model.predict(X_test)

print("\nModel Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))

# 1️⃣0️⃣ Save model & scaler
joblib.dump(model, 'breast_cancer_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("\n✅ Model and scaler saved successfully in current folder!")
