In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("cancer_classification.csv")
df.head()

In [None]:
df.info()

In [None]:
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [None]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, random_state=1)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
logreg = LogisticRegression()
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)
print(f"Training Accuracy : {train}\nTesting Accuracy : {test}\n\n")
print(classification_report(ytest, ypred))

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

In [None]:
logreg = LogisticRegression()
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)
print(f"Training Accuracy : {train}\nTesting Accuracy : {test}\n\n")
print(classification_report(ytest, ypred))

In [None]:
for i in np.logspace(-3, 3, 7):
    print(i)

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = {
    "penalty": ["l1", "l2", "elasticnet"],
    "C": np.logspace(-3, 3, 7),
    "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]
}

In [None]:
grid = GridSearchCV(LogisticRegression(),  #model
                    parameters,             #hyperparameters
                    verbose=3,             #way of computation
                    cv=5,                  #cross validation
                    scoring="accuracy")    #metrics 

In [None]:
grid.fit(xtrain, ytrain)

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
grid.best_estimator_

In [None]:
logreg = grid.best_estimator_
logreg.fit(xtrain, ytrain)
ypred = logreg.predict(xtest)

train = logreg.score(xtrain, ytrain)
test = logreg.score(xtest, ytest)
print(f"Training Accuracy : {train}\nTesting Accuracy : {test}\n\n")
print(classification_report(ytest, ypred))

In [None]:
from sklearn.model_selection import cross_val_score
cvs = cross_val_score(logreg, x,y, cv=5, scoring="accuracy")
print(f"Avg Accuracy : {cvs.mean()}\nSTD : {cvs.std()}")