In [94]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data = load_breast_cancer()
x = data.data
y = data.target

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Standardize the dataset (important for logistic regression and SVM)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


#### Logistic Regression

In [95]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix

model=LogisticRegression()
model.fit(x_train,y_train)
ly_pred=model.predict(x_test)

confusion_matrix(y_test,ly_pred)
L_accuracy=accuracy_score(y_test,ly_pred)*100
print("Accuracy of the logistic model is {:.9f}%".format(L_accuracy))

Accuracy of the logistic model is 97.368421053%


#### Support Vector Machines

In [96]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Initialize and train the SVM classifier
svm_model = SVC(kernel='rbf')  # You can also use other kernels like 'rbf', 'poly', etc.
svm_model.fit(x_train, y_train)

# Make predictions
sy_pred = svm_model.predict(x_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, sy_pred)*100
print(f"Accuracy: {accuracy:.9f}")

Accuracy: 98.245614035


#### Decision Trees

In [97]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)

y_pred = dtc.predict(x_test)

accuracy = dtc.score(x_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.9385964912280702


#### K Nearest Neightbors

In [98]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)

knn.fit(x_train, y_train)

y_pred = knn.predict(x_test)

knn.score(x_test, y_test)

0.9473684210526315

#### Random Forest Classification

In [99]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(x_train, y_train)

# Make predictions on the test set
rf_pred = rf_model.predict(x_test)

# Accuracy of Random Forest
rf_acc = accuracy_score(y_test, rf_pred)
print(f"Random Forest Accuracy: {rf_acc:.4f}")

# Confusion Matrix for Random Forest
print("Confusion Matrix for Random Forest:")
print(confusion_matrix(y_test, rf_pred))

Random Forest Accuracy: 0.9649
Confusion Matrix for Random Forest:
[[40  3]
 [ 1 70]]


#### Kernel Ridge Regression

In [105]:
from sklearn.kernel_ridge import KernelRidge
from sklearn.metrics import mean_squared_error

# Initialize and train the Kernel Ridge Regression model
kr_model = KernelRidge(alpha=1.0, kernel='rbf')
kr_model.fit(x_train, y_train)

# Make predictions
kr_pred = kr_model.predict(x_test)

# Evaluate the model
kr_mse = mean_squared_error(y_test, kr_pred)
print(f"Mean Squared Error of Kernel Ridge Regression: {kr_mse:.4f}")
accuracy = kr_model.score(x_test, y_test)
print(f"accuracy: {accuracy}")

Mean Squared Error of Kernel Ridge Regression: 0.0378
accuracy: 0.8392149687034245


#### Regularisation

In [101]:
# Logistic Regression without regularization
log_reg_no_reg = LogisticRegression(penalty=None, max_iter=1000)
log_reg_no_reg.fit(x_train, y_train)
no_reg_pred = log_reg_no_reg.predict(x_test)

# Accuracy and confusion matrix
no_reg_acc = accuracy_score(y_test, no_reg_pred)
print(f"Accuracy without Regularization: {no_reg_acc:.4f}")
print("Confusion Matrix without Regularization:")
print(confusion_matrix(y_test, no_reg_pred))


Accuracy without Regularization: 0.9386
Confusion Matrix without Regularization:
[[42  1]
 [ 6 65]]


In [102]:
# Logistic Regression with L2 Regularization (Ridge)
log_reg_l2 = LogisticRegression(penalty='l2', C=1.0, max_iter=1000)  # C=1.0 is default, smaller values increase regularization
log_reg_l2.fit(x_train, y_train)
l2_pred = log_reg_l2.predict(x_test)

# Accuracy and confusion matrix
l2_acc = accuracy_score(y_test, l2_pred)
print(f"Accuracy with L2 Regularization: {l2_acc:.4f}")
print("Confusion Matrix with L2 Regularization:")
print(confusion_matrix(y_test, l2_pred))


Accuracy with L2 Regularization: 0.9737
Confusion Matrix with L2 Regularization:
[[41  2]
 [ 1 70]]


In [103]:
# Logistic Regression with L1 Regularization (Lasso)
log_reg_l1 = LogisticRegression(penalty='l1', solver='liblinear', C=1.0, max_iter=1000)
log_reg_l1.fit(x_train, y_train)
l1_pred = log_reg_l1.predict(x_test)

# Accuracy and confusion matrix
l1_acc = accuracy_score(y_test, l1_pred)
print(f"Accuracy with L1 Regularization: {l1_acc:.4f}")
print("Confusion Matrix with L1 Regularization:")
print(confusion_matrix(y_test, l1_pred))


Accuracy with L1 Regularization: 0.9737
Confusion Matrix with L1 Regularization:
[[42  1]
 [ 2 69]]


In [104]:
# Coefficients without regularization
print("Coefficients without regularization:", log_reg_no_reg.coef_)

# Coefficients with L2 regularization
print("Coefficients with L2 regularization:", log_reg_l2.coef_)

# Coefficients with L1 regularization
print("Coefficients with L1 regularization:", log_reg_l1.coef_)


Coefficients without regularization: [[   9.43309636  -16.98638813   40.02092582   10.89014962    5.03344322
   274.94009818 -134.8381584  -266.18473606   40.99650503 -168.01794604
  -259.63946217   35.91792789  115.35011389 -167.38940131  -79.54006031
   -97.5741303   246.60809325  -70.05165414   95.95418597  131.93923412
  -105.87135969  -91.92127648   73.01874488 -103.73668988   93.97496151
    29.28537604 -244.05971135   -8.90816016 -169.6548548    59.68383553]]
Coefficients with L2 regularization: [[-0.43190368 -0.38732553 -0.39343248 -0.46521006 -0.07166728  0.54016395
  -0.8014581  -1.11980408  0.23611852  0.07592093 -1.26817815  0.18887738
  -0.61058302 -0.9071857  -0.31330675  0.68249145  0.17527452 -0.3112999
   0.50042502  0.61622993 -0.87984024 -1.35060559 -0.58945273 -0.84184594
  -0.54416967  0.01611019 -0.94305313 -0.77821726 -1.20820031 -0.15741387]]
Coefficients with L1 regularization: [[ 0.          0.          0.          0.          0.          0.
   0.         -2.4