### Project Outline 
#### Step 1: Import Libraries
#### Step 2: Load and Understand the Dataset
#### Step 3: Data Preprocessing
#### Step 4: Split Data into Training and Testing Sets
#### Step 5: Train a Model
#### Step 6: Evaluate the Model

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
from sklearn.datasets import load_breast_cancer

# Load the Breast Cancer dataset
cancer = load_breast_cancer()
X = cancer.data  # Features
y = cancer.target  # Target labels

# Create a DataFrame for easier visualization
data = pd.DataFrame(X, columns=cancer.feature_names)
data['target'] = y

# Display the first few rows and some basic information
print(data.head())
print(data['target'].value_counts())  # Check the balance of classes


   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0             

In [3]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
X_scaled = scaler.fit_transform(X)


In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [5]:
# Initialize the Logistic Regression model
model = LogisticRegression(random_state=42)

# Train the model
model.fit(X_train, y_train)


In [6]:
# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=cancer.target_names))

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 97.37%
Classification Report:
              precision    recall  f1-score   support

   malignant       0.98      0.95      0.96        43
      benign       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Confusion Matrix:
[[41  2]
 [ 1 70]]


In [7]:
import numpy as np

# Custom input data (replace with your own values)
custom_input = [14.5, 20.1, 94.8, 654.0, 0.091, 0.115, 0.09, 0.051, 0.194, 0.062, 
                0.4, 1.2, 2.7, 35.0, 0.005, 0.02, 0.03, 0.01, 0.02, 0.004, 
                15.1, 25.0, 99.4, 710.0, 0.104, 0.147, 0.108, 0.07, 0.23, 0.07]

# Reshape and scale the custom input
custom_input = np.array(custom_input).reshape(1, -1)
custom_input_scaled = scaler.transform(custom_input)

# Predict using the trained model
prediction = model.predict(custom_input_scaled)
prediction_label = cancer.target_names[prediction[0]]
print(f"The model predicts that the tumor is: {prediction_label}")


The model predicts that the tumor is: benign
