In [1]:
import pandas as pd
import numpy as np

colnames=['variance', 'skewness', 'curtosis', 'entropy', 'class'] 
df = pd.read_csv('data_banknote_authentication.txt', sep=',', names=colnames, header=None)
df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [6]:
import random
def generateData():
    variance = random.uniform(-7.042100, 6.824800)
    skewness = random.uniform(-13.773100, 12.951600)
    curtosis = random.uniform(-5.286100, 17.927400)
    entropy = random.uniform(-8.548200, 2.449500)

    return [[variance, skewness, curtosis, entropy]]

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, mean_absolute_error, mean_squared_error

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values 

# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) 

# Standardize features by removing mean and scaling to unit variance:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test) 

In [33]:
classifier = LogisticRegression()

classifier.fit(X_train, y_train) 

# Predict y data with classifier: 
y_predict = classifier.predict(X_test)

# Print results: 
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))


loss = log_loss(y_test, y_predict, eps=1e-15)
accuracy = accuracy_score(y_test, y_predict)
auc = roc_auc_score(y_test, y_predict)
mae = mean_absolute_error(y_test, y_predict)
rmse = mean_squared_error(y_test, y_predict, squared=False)

print(loss)
print(accuracy)
print(auc)
print(mae)
print(rmse)

[[139   3]
 [  0 133]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       142
           1       0.98      1.00      0.99       133

    accuracy                           0.99       275
   macro avg       0.99      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275

0.37679537446190153
0.9890909090909091
0.9894366197183099
0.01090909090909091
0.1044465935734187


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score, mean_absolute_error, mean_squared_error
import time

class ModelController():
    
    def build_and_evaluate(self):
        timestamp = time.time()

        colnames=['variance', 'skewness', 'curtosis', 'entropy', 'class'] 
        df = pd.read_csv('./data_banknote_authentication.txt', sep=',', names=colnames, header=None)

        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values 

        # Split dataset into random train and test subsets:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20) 

        # Standardize features by removing mean and scaling to unit variance:
        scaler = StandardScaler()
        scaler.fit(X_train)

        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test) 

        classifier = LogisticRegression()

        classifier.fit(X_train, y_train) 

        # Predict y data with classifier: 
        y_predict = classifier.predict(X_test)

        # Print results: 
        loss = log_loss(y_test, y_predict, eps=1e-15)
        accuracy = accuracy_score(y_test, y_predict)
        auc = roc_auc_score(y_test, y_predict)
        mae = mean_absolute_error(y_test, y_predict)
        rmse = mean_squared_error(y_test, y_predict, squared=False)
        
        eval = {
            'loss' : loss,
            'accuracy' : accuracy,
            'auc' : auc,
            'mae' : mae,
            'rmse' : rmse,
            'timestamp': timestamp
        }
        return classifier, eval
          

In [8]:
mc = ModelController()
model, ev = mc.build_and_evaluate()

In [5]:
colnames=['variance', 'skewness', 'curtosis', 'entropy', 'class'] 
df = pd.read_csv('./data_banknote_authentication.txt', sep=',', names=colnames, header=None)

X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values 


In [20]:
model.predict(generateData())

array([0])

In [38]:
df.loc[df['class'] == 1]

Unnamed: 0,variance,skewness,curtosis,entropy,class
762,-1.39710,3.31910,-1.392700,-1.99480,1
763,0.39012,-0.14279,-0.031994,0.35084,1
764,-1.66770,-7.15350,7.892900,0.96765,1
765,-3.84830,-12.80470,15.682400,-1.28100,1
766,-3.56810,-8.21300,10.083000,0.96765,1
...,...,...,...,...,...
1367,0.40614,1.34920,-1.450100,-0.55949,1
1368,-1.38870,-4.87730,6.477400,0.34179,1
1369,-3.75030,-13.45860,17.593200,-2.77710,1
1370,-3.56370,-8.38270,12.393000,-1.28230,1


In [54]:
model.predict_proba([[ 18,  1.6661 , -23.8073 , -2.4699]])

array([[4.21369304e-04, 9.99578631e-01]])