In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load the dataset
df = pd.read_csv('/content/drive/MyDrive/Breast_Cancer.csv')
df.head()


Unnamed: 0,Id,Cl.thickness,Cell.size,Cell.shape,Marg.adhesion,Epith.c.size,Bare.nuclei,Bl.cromatin,Normal.nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1.0,3,1,1,0
1,1002945,5,4,4,5,7,10.0,3,2,1,0
2,1015425,3,1,1,1,2,2.0,3,1,1,0
3,1016277,6,8,8,1,3,4.0,3,7,1,0
4,1017023,4,1,1,3,2,1.0,3,1,1,0


In [None]:
X = df.drop("Class", axis=1)
Y = df["Class"]
imputer = SimpleImputer(strategy='mean')

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


In [None]:
# Perform feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Fit and transform the imputer on the training set
X_train_scaled = imputer.fit_transform(X_train_scaled)


In [None]:
# Transform the testing set using the fitted imputer
X_test_scaled = imputer.transform(X_test_scaled)


In [None]:
# Train a Logistic Regression model
logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(X_train_scaled, Y_train)
linear_regression_model = LinearRegression()
linear_regression_model.fit(X_train_scaled, Y_train)

In [None]:
# Train a KNN Classification model
knn_model = KNeighborsClassifier()
knn_model.fit(X_train_scaled, Y_train)


In [None]:
# Display the coefficients and form the logistic regression equation
coefficients = logistic_regression_model.coef_
intercept = logistic_regression_model.intercept_
print("Coefficients:", coefficients)
print("Intercept:", intercept)

Coefficients: [[-0.35882394  1.36324491  0.26906617  0.97564894  0.51860193  0.2981035
   1.42400929  0.8380279   0.01207334  0.58401863]]
Intercept: [-1.26173015]


In [None]:
# Compute accuracy and confusion matrix for Logistic Regression model
logistic_regression_predictions = logistic_regression_model.predict(X_test_scaled)
logistic_regression_accuracy = accuracy_score(Y_test, logistic_regression_predictions.round())
logistic_regression_confusion_matrix = confusion_matrix(Y_test, logistic_regression_predictions.round())
print("Logistic Regression Accuracy:", logistic_regression_accuracy)
print("Logistic Regression Confusion Matrix:")
print(logistic_regression_confusion_matrix)


Logistic Regression Accuracy: 0.9642857142857143
Logistic Regression Confusion Matrix:
[[94  1]
 [ 4 41]]


In [None]:
# Compute accuracy and confusion matrix for KNN Classification model
knn_predictions = knn_model.predict(X_test_scaled)
knn_accuracy = accuracy_score(Y_test, knn_predictions)
knn_confusion_matrix = confusion_matrix(Y_test, knn_predictions)
print("KNN Classification Accuracy:", knn_accuracy)
print("KNN Classification Confusion Matrix:")
print(knn_confusion_matrix)


KNN Classification Accuracy: 0.9714285714285714
KNN Classification Confusion Matrix:
[[94  1]
 [ 3 42]]


In [None]:
# Make predictions on the test set
linear_regression_predictions = linear_regression_model.predict(X_test_scaled)


In [None]:
# Compute mean squared error and R-squared score
linear_regression_mse = mean_squared_error(Y_test, linear_regression_predictions)
linear_regression_r2 = r2_score(Y_test, linear_regression_predictions)

print("Linear Regression Mean Squared Error:", linear_regression_mse)
print("Linear Regression R-squared Score:", linear_regression_r2)

Linear Regression Mean Squared Error: 0.038953499704094674
Linear Regression R-squared Score: 0.8214061767952618
