DETECTING BREAST CANCER (FINAL PROJECT AT UPFALIRS BY ME)

In [29]:
# import os
# import pandas as pd
# from sklearn.datasets import load_breast_cancer
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score
# import joblib

In [30]:
# # Ensure the directory exists
# if not os.path.exists('model'):
#     os.makedirs('model')


In [31]:
# # Load the dataset
# data = load_breast_cancer()
# X = pd.DataFrame(data.data, columns=data.feature_names)
# y = data.target

In [32]:
# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [33]:
# # Standardize the features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

In [34]:
# # Initialize and train the Support Vector Machine (SVM) model
# model = SVC(kernel='linear', C=1, random_state=42)
# model.fit(X_train_scaled, y_train)


In [35]:
# # Make predictions
# y_pred = model.predict(X_test_scaled)

# # Calculate accuracy
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy:.2f}")



In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.impute import SimpleImputer
import joblib


In [37]:
# Load the dataset
df = pd.read_csv('Breast_Cancer_data.csv')

# Check for missing values
print("Missing values before imputation:")
print(df.isnull().sum())

# Handle missing values by imputing with the median value
imputer = SimpleImputer(strategy='median')
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']
X_imputed = imputer.fit_transform(X)

# Convert target labels from 'M'/'B' to 1/0
y = y.map({'M': 1, 'B': 0})

# Normalize the features
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X_imputed)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)


Missing values before imputation:
id                           0
diagnosis                    0
radius_mean                  0
texture_mean                 0
perimeter_mean               0
area_mean                    0
smoothness_mean              0
compactness_mean             0
concavity_mean               0
concave points_mean          0
symmetry_mean                0
fractal_dimension_mean       0
radius_se                    0
texture_se                   0
perimeter_se                 0
area_se                      0
smoothness_se                0
compactness_se               0
concavity_se                 0
concave points_se            0
symmetry_se                  0
fractal_dimension_se         0
radius_worst                 0
texture_worst                0
perimeter_worst              0
area_worst                   0
smoothness_worst             0
compactness_worst            0
concavity_worst              0
concave points_worst         0
symmetry_worst               0
fract



In [38]:
# Initialize the SVM model
model = SVC(probability=True)

# Apply 9-fold cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=9, scoring='accuracy')

print(f'Cross-validation Accuracy Scores: {cv_scores}')
print(f'Average Cross-validation Accuracy: {cv_scores.mean()}')


Cross-validation Accuracy Scores: [1.         0.98039216 0.98039216 0.96078431 0.98039216 1.
 0.98       0.98       0.92      ]
Average Cross-validation Accuracy: 0.9757734204793029


In [39]:
# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')


Accuracy: 0.9736842105263158
Precision: 0.9761904761904762
Recall: 0.9534883720930233
F1 Score: 0.9647058823529412


In [40]:
# Save the trained model
joblib.dump(model, 'cancer_model.pkl')


['cancer_model.pkl']