In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd

In [2]:
data = sns.load_dataset('iris')
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [3]:
data.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [4]:
data = data[data['species']!='setosa']
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
52,6.9,3.1,4.9,1.5,versicolor
53,5.5,2.3,4.0,1.3,versicolor
54,6.5,2.8,4.6,1.5,versicolor


In [5]:
data.species.unique()

array(['versicolor', 'virginica'], dtype=object)

In [6]:
data['species'] = data['species'].map({'versicolor' : 0, 'virginica' : 1})

In [7]:
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0


In [8]:
X = data.iloc[:,:-1]
X.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
50,7.0,3.2,4.7,1.4
51,6.4,3.2,4.5,1.5
52,6.9,3.1,4.9,1.5
53,5.5,2.3,4.0,1.3
54,6.5,2.8,4.6,1.5


In [9]:
y = data['species']
y.head()

50    0
51    0
52    0
53    0
54    0
Name: species, dtype: int64

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=41)

In [11]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [12]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
classifier = LogisticRegression()
classifier

In [14]:
from sklearn.model_selection import GridSearchCV

In [15]:
# parameters = {'penalty' : ['l1', 'l2', 'elasticnet'], 'C' : [2,1,3,4,5,6,7,8,9,10], 'max_iter' : [100, 200, 300]}
parameters = {'penalty' : ['l1', 'l2', 'elasticnet'], 'C' : [1, 2, 3,4,5], 'max_iter' : [100, 200, 300]}

In [16]:
grid = GridSearchCV(classifier, parameters, scoring='accuracy', cv=10)

In [17]:
grid.fit(X_train, y_train)

300 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
150 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\naeem\Desktop\AI\env\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\naeem\Desktop\AI\env\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\naeem\Desktop\AI\env\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^

In [20]:
grid.best_params_

{'C': 1, 'max_iter': 100, 'penalty': 'l2'}

In [21]:
grid.best_score_

np.float64(0.9464285714285715)

In [22]:
best_model = grid.best_estimator_

In [26]:
pred = best_model.predict(X_test)

In [27]:
from sklearn.metrics import accuracy_score, classification_report

In [28]:
score = accuracy_score(y_test, pred)
score

0.92

In [29]:
print(classification_report(pred, y_test))

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.85      0.92        13

    accuracy                           0.92        25
   macro avg       0.93      0.92      0.92        25
weighted avg       0.93      0.92      0.92        25



In [30]:
import joblib
joblib.dump(best_model, 'iris_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']