In [287]:
import numpy as np
import pandas as pd


In [290]:
with open("../data-library/lenses/lenses.names") as f:
    print(f.read())

1. Title: Database for fitting contact lenses

2. Sources:
     (a) Cendrowska, J. "PRISM: An algorithm for inducing modular rules",
         International Journal of Man-Machine Studies, 1987, 27, 349-370
     (b) Donor: Benoit Julien (Julien@ce.cmu.edu)
     (c) Date: 1 August 1990

3. Past Usage:
      1. See above.
      2. Witten, I. H. & MacDonald, B. A. (1988). Using concept
         learning for knowledge acquisition. International Journal of
         Man-Machine Studies, 27, (pp. 349-370).

 Notes:  This database is complete (all possible combinations of
         attribute-value pairs are represented).

         Each instance is complete and correct.

         9 rules cover the training set.

4. Relevant Information Paragraph:
    The examples are complete and noise free.
    The examples highly simplified the problem. The attributes do not
    fully describe all the factors affecting the decision as to which type,
    if any, to fit.

5. Number of Instances: 24

6. Number of 

In [292]:
data = pd.read_csv("../data-library/lenses/lenses.data",sep='\s+',
                   names=['age','spectacle prescription','astigmatic','tear production rate','class']
                  )

In [293]:
x = data.drop('class',axis=1)

In [294]:
y = data['class']

In [295]:
x.head()

Unnamed: 0,age,spectacle prescription,astigmatic,tear production rate
1,1,1,1,1
2,1,1,1,2
3,1,1,2,1
4,1,1,2,2
5,1,2,1,1


In [296]:
y.unique()

array([3, 2, 1])

In [297]:
from sklearn.model_selection import train_test_split

In [298]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

In [299]:
from sklearn.linear_model import LogisticRegression

In [300]:
logReg = LogisticRegression(solver='lbfgs')

In [301]:
# help(logReg)

In [302]:
logReg.fit(x_train,y_train)

LogisticRegression()

In [303]:
logReg.classes_

array([1, 2, 3])

In [304]:
logReg.intercept_

array([-1.96090205, -0.83709419,  2.79799624])

In [305]:
logReg.coef_

array([[-0.19647759, -0.30499016,  0.88805481,  0.65490183],
       [ 0.04993865,  0.33833598, -0.84291195,  0.67652978],
       [ 0.14653894, -0.03334582, -0.04514287, -1.3314316 ]])

In [306]:
logReg.predict_proba(x_test)

array([[0.45483318, 0.1427678 , 0.40239902],
       [0.12893792, 0.03960645, 0.83145563],
       [0.07314606, 0.05469988, 0.87215405],
       [0.09954514, 0.05818329, 0.84227156],
       [0.02813797, 0.11880578, 0.85305624],
       [0.25116145, 0.34790705, 0.4009315 ],
       [0.13653966, 0.4604466 , 0.40301373],
       [0.2941145 , 0.22475263, 0.48113287]])

In [307]:
logReg.predict(x_test)

array([1, 3, 3, 3, 3, 3, 2, 3])

In [308]:
y_pred = logReg.predict(x_test)
print('Accuracy', logReg.score(x_test, y_test))

Accuracy 0.875


In [309]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[1 0 0]
 [0 1 1]
 [0 0 5]]


In [310]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1
           2       1.00      0.50      0.67         2
           3       0.83      1.00      0.91         5

    accuracy                           0.88         8
   macro avg       0.94      0.83      0.86         8
weighted avg       0.90      0.88      0.86         8



In [311]:
from sklearn.datasets import load_iris

In [312]:
data = load_iris()

In [313]:
# data.data

In [314]:
data.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [315]:
x = data.data

In [316]:
y = data.target

In [317]:
# x

In [318]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [319]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=0)

In [320]:
logreg = LogisticRegression()

In [321]:
logreg.fit(x_train,y_train)

LogisticRegression()

In [322]:
y_pred = logreg.predict(x_test)
print('Accuracy :',logreg.score(x_test, y_test))

Accuracy : 0.9777777777777777


In [323]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[16  0  0]
 [ 0 17  1]
 [ 0  0 11]]


In [324]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45

