In [None]:
import numpy as np
import pandas as pd
import sklearn
from sklearn import linear_model, decomposition, datasets
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
df = pd.read_csv("../input/banknote-authentication-uci/BankNoteAuthentication.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df = sklearn.utils.shuffle(df)

In [None]:
df.head()

In [None]:
df["class"].value_counts()

In [None]:
df.skew()

In [None]:
df.isnull().sum()

In [None]:
df.corr()

In [None]:
X = df.drop('class', axis = 1)
y = df['class']

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.3, random_state = 99 )

In [None]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

In [None]:
# Create Pipeline; in pipeline will have scaler; and model.

sc = StandardScaler()
lr = LogisticRegression()

pipe = Pipeline(
    steps = [
        ('scale', sc),
        ('model', lr)
    ]
)

In [None]:
# Create a list of values for regulaization parameter
C = np.logspace(-4, 4, 50)

# Create a list of options for regularization penalty
penalty =['l1', 'l2'] # will use both L1 and L2.

# params
params = dict(
    model__C = C,
    model__penalty = penalty
)

In [None]:
clf = GridSearchCV(pipe, params)

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
clf.fit(X_train, y_train)

In [None]:
# Best penalty
print(clf.best_estimator_.get_params()['model__penalty'])

# Best C
print(clf.best_estimator_.get_params()['model__C'])

# Best model
print(clf.best_estimator_.get_params()['model'])

In [None]:
pred = clf.predict(X_valid)

In [None]:
clf.best_score_

In [None]:
confusion_matrix(y_valid, pred)

In [None]:
accuracy_score(y_valid, pred)

In [None]:
print(classification_report(y_valid, pred))