# **Nursery**

Nursery Database was derived from a hierarchical decision model originally developed to rank applications for nursery schools.

Download The Dataset : https://archive.ics.uci.edu/static/public/76/nursery.zip

### **Step 1 : import libraries**

In [57]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import os

### **Step 2 : Read Data From CSV file**

In [2]:
dataset = pd.read_csv("nursery.data",names=["parents","has_nurs","form","children","housing","finance","social","health","class"])
dataset.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority


### **Step 3 : Prepar Data**
- check is null ? 
- convert categorical features to numrical 

In [3]:
dataset.isnull().sum()

parents     0
has_nurs    0
form        0
children    0
housing     0
finance     0
social      0
health      0
class       0
dtype: int64

In [4]:
dataset["housing"].value_counts()

housing
convenient    4320
less_conv     4320
critical      4320
Name: count, dtype: int64

In [5]:
dataset["finance"].unique()

array(['convenient', 'inconv'], dtype=object)

In [6]:
dataset["children"].value_counts()

children
1       3240
2       3240
3       3240
more    3240
Name: count, dtype: int64

In [7]:
dataset["health"].value_counts()

health
recommended    4320
priority       4320
not_recom      4320
Name: count, dtype: int64

In [8]:
dataset["social"].value_counts()

social
nonprob          4320
slightly_prob    4320
problematic      4320
Name: count, dtype: int64

In [16]:
dataset.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class,target
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend,2
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority,1
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom,0
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend,2
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority,1


In [46]:
le = LabelEncoder()

transformer = ColumnTransformer(
    transformers=[
        ['categorical' , OneHotEncoder(),["parents","has_nurs","form","children","housing","social","health","finance"]]
    ],
    remainder="passthrough",
    n_jobs=os.cpu_count(),
)

In [47]:
le.fit(dataset["class"])

dataset["target"] = le.transform(dataset["class"])

In [48]:
dataset.head()

Unnamed: 0,parents,has_nurs,form,children,housing,finance,social,health,class,target
0,usual,proper,complete,1,convenient,convenient,nonprob,recommended,recommend,2
1,usual,proper,complete,1,convenient,convenient,nonprob,priority,priority,1
2,usual,proper,complete,1,convenient,convenient,nonprob,not_recom,not_recom,0
3,usual,proper,complete,1,convenient,convenient,slightly_prob,recommended,recommend,2
4,usual,proper,complete,1,convenient,convenient,slightly_prob,priority,priority,1


In [49]:
X = dataset.drop(columns=["class","target"])
y = dataset["target"]

In [50]:
transformer.fit(X)
X = transformer.transform(X)

### **Step 4 : split data into train and test**

In [53]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42,shuffle=True)

### **Step 5 : Choose Model and Train**

In [58]:
logisitc_grid = GridSearchCV(
    estimator=LogisticRegression(n_jobs=os.cpu_count()),
    cv=5,
    param_grid={
        "C" : [0,0.01,0.1,10,100],
        "penalty" : ["l1","l2","elasticnet"],
        "solver" : ["lbfgs","liblinear","saga"]
    },
    refit=True,
    verbose=2
)
logisitc_grid.fit(X_train,y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV] END ......................C=0, penalty=l1, solver=lbfgs; total time=   0.0s
[CV] END ......................C=0, penalty=l1, solver=lbfgs; total time=   0.0s
[CV] END ......................C=0, penalty=l1, solver=lbfgs; total time=   0.0s
[CV] END ......................C=0, penalty=l1, solver=lbfgs; total time=   0.0s
[CV] END ......................C=0, penalty=l1, solver=lbfgs; total time=   0.0s
[CV] END ..................C=0, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ..................C=0, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ..................C=0, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ..................C=0, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ..................C=0, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END .......................C=0, penalty=l1, solver=saga; total time=   0.0s
[CV] END .......................C=0, penalty=l1



[CV] END ...............C=0.01, penalty=l1, solver=liblinear; total time=   0.1s
[CV] END ...............C=0.01, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ...............C=0.01, penalty=l1, solver=liblinear; total time=   0.0s
[CV] END ...............C=0.01, penalty=l1, solver=liblinear; total time=   0.1s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ....................C=0.01, penalty=l1, solver=saga; total time=   0.1s
[CV] END ...................C=0.01, penalty=l2, solver=lbfgs; total time=   2.8s
[CV] END ...................C=0.01, penalty=l2, solver=lbfgs; total time=   1.5s
[CV] END ...................C=0.01, penalty=l2, solver=lbfgs; total time=   1.5s
[CV] END ...................



[CV] END ....................C=0.01, penalty=l2, solver=saga; total time=   0.3s
[CV] END ....................C=0.01, penalty=l2, solver=saga; total time=   0.3s
[CV] END ....................C=0.01, penalty=l2, solver=saga; total time=   0.2s
[CV] END ....................C=0.01, penalty=l2, solver=saga; total time=   0.2s
[CV] END ....................C=0.01, penalty=l2, solver=saga; total time=   0.3s
[CV] END ...........C=0.01, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ...........C=0.01, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ...........C=0.01, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ...........C=0.01, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ...........C=0.01, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .......C=0.01, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .......C=0.01, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .......C=0.01, pena



[CV] END ................C=0.1, penalty=l1, solver=liblinear; total time=   0.1s
[CV] END ................C=0.1, penalty=l1, solver=liblinear; total time=   0.1s
[CV] END ................C=0.1, penalty=l1, solver=liblinear; total time=   0.1s




[CV] END ................C=0.1, penalty=l1, solver=liblinear; total time=   0.1s




[CV] END .....................C=0.1, penalty=l1, solver=saga; total time=   0.7s




[CV] END .....................C=0.1, penalty=l1, solver=saga; total time=   0.6s




[CV] END .....................C=0.1, penalty=l1, solver=saga; total time=   0.7s




[CV] END .....................C=0.1, penalty=l1, solver=saga; total time=   0.6s




[CV] END .....................C=0.1, penalty=l1, solver=saga; total time=   0.7s
[CV] END ....................C=0.1, penalty=l2, solver=lbfgs; total time=   1.6s
[CV] END ....................C=0.1, penalty=l2, solver=lbfgs; total time=   1.5s
[CV] END ....................C=0.1, penalty=l2, solver=lbfgs; total time=   1.5s
[CV] END ....................C=0.1, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ....................C=0.1, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ................C=0.1, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END ................C=0.1, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ................C=0.1, penalty=l2, solver=liblinear; total time=   0.0s
[CV] END ................C=0.1, penalty=l2, solver=liblinear; total time=   0.0s




[CV] END ................C=0.1, penalty=l2, solver=liblinear; total time=   0.0s




[CV] END .....................C=0.1, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=0.1, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=0.1, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=0.1, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=0.1, penalty=l2, solver=saga; total time=   0.5s
[CV] END ............C=0.1, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=0.1, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=0.1, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=0.1, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=0.1, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ........C=0.1, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=0.1, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=0.1, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=0.1, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=0.1, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .............C=0.1, penalty=elasticnet, solver=saga; total time=   0.0s
[CV] END .............C=0.1,



[CV] END .................C=10, penalty=l1, solver=liblinear; total time=   1.1s




[CV] END .................C=10, penalty=l1, solver=liblinear; total time=   0.9s




[CV] END .................C=10, penalty=l1, solver=liblinear; total time=   0.9s




[CV] END .................C=10, penalty=l1, solver=liblinear; total time=   0.6s




[CV] END ......................C=10, penalty=l1, solver=saga; total time=   0.7s




[CV] END ......................C=10, penalty=l1, solver=saga; total time=   0.7s




[CV] END ......................C=10, penalty=l1, solver=saga; total time=   0.7s




[CV] END ......................C=10, penalty=l1, solver=saga; total time=   0.7s




[CV] END ......................C=10, penalty=l1, solver=saga; total time=   0.7s
[CV] END .....................C=10, penalty=l2, solver=lbfgs; total time=   0.2s
[CV] END .....................C=10, penalty=l2, solver=lbfgs; total time=   0.2s
[CV] END .....................C=10, penalty=l2, solver=lbfgs; total time=   0.2s
[CV] END .....................C=10, penalty=l2, solver=lbfgs; total time=   0.2s
[CV] END .....................C=10, penalty=l2, solver=lbfgs; total time=   0.2s
[CV] END .................C=10, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END .................C=10, penalty=l2, solver=liblinear; total time=   0.1s




[CV] END .................C=10, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END .................C=10, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END .................C=10, penalty=l2, solver=liblinear; total time=   0.1s




[CV] END ......................C=10, penalty=l2, solver=saga; total time=   0.4s




[CV] END ......................C=10, penalty=l2, solver=saga; total time=   0.4s




[CV] END ......................C=10, penalty=l2, solver=saga; total time=   0.4s




[CV] END ......................C=10, penalty=l2, solver=saga; total time=   0.4s




[CV] END ......................C=10, penalty=l2, solver=saga; total time=   0.4s
[CV] END .............C=10, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .............C=10, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .............C=10, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .............C=10, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .............C=10, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END .........C=10, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .........C=10, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .........C=10, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .........C=10, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .........C=10, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ..............C=10, penalty=elasticnet, solver=saga; total time=   0.0s
[CV] END ..............C=10,



[CV] END ................C=100, penalty=l1, solver=liblinear; total time=   0.3s




[CV] END ................C=100, penalty=l1, solver=liblinear; total time=   0.3s




[CV] END .....................C=100, penalty=l1, solver=saga; total time=   0.8s




[CV] END .....................C=100, penalty=l1, solver=saga; total time=   0.8s




[CV] END .....................C=100, penalty=l1, solver=saga; total time=   0.8s




[CV] END .....................C=100, penalty=l1, solver=saga; total time=   0.8s




[CV] END .....................C=100, penalty=l1, solver=saga; total time=   0.9s
[CV] END ....................C=100, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ....................C=100, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ....................C=100, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ....................C=100, penalty=l2, solver=lbfgs; total time=   0.1s
[CV] END ....................C=100, penalty=l2, solver=lbfgs; total time=   0.1s




[CV] END ................C=100, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END ................C=100, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END ................C=100, penalty=l2, solver=liblinear; total time=   0.1s




[CV] END ................C=100, penalty=l2, solver=liblinear; total time=   0.1s
[CV] END ................C=100, penalty=l2, solver=liblinear; total time=   0.1s




[CV] END .....................C=100, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=100, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=100, penalty=l2, solver=saga; total time=   0.4s




[CV] END .....................C=100, penalty=l2, solver=saga; total time=   0.4s


125 fits failed out of a total of 225.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
45 fits failed with the following error:
Traceback (most recent call last):
  File "/home/meraj/Desktop/machine learning exersize/env/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/meraj/Desktop/machine learning exersize/env/lib/python3.12/site-packages/sklearn/base.py", line 1358, in wrapper
    estimator._validate_params()
  File "/home/meraj/Desktop/machine learning exersize/env/lib/python3.12/site-packages/sklearn/base.py", line 471, in _validate_params
    validate_parameter_constraints(
  File "/home/meraj/Desktop/machine learning ex

[CV] END .....................C=100, penalty=l2, solver=saga; total time=   0.4s
[CV] END ............C=100, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=100, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=100, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=100, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ............C=100, penalty=elasticnet, solver=lbfgs; total time=   0.0s
[CV] END ........C=100, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=100, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=100, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=100, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END ........C=100, penalty=elasticnet, solver=liblinear; total time=   0.0s
[CV] END .............C=100, penalty=elasticnet, solver=saga; total time=   0.0s
[CV] END .............C=100,



0,1,2
,estimator,LogisticRegression(n_jobs=8)
,param_grid,"{'C': [0, 0.01, ...], 'penalty': ['l1', 'l2', ...], 'solver': ['lbfgs', 'liblinear', ...]}"
,scoring,
,n_jobs,
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l1'
,dual,False
,tol,0.0001
,C,100
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'saga'
,max_iter,100


In [59]:
logisitc_grid.best_params_

{'C': 100, 'penalty': 'l1', 'solver': 'saga'}

In [60]:
y_predicted = logisitc_grid.predict(X_test)
print(classification_report(y_test,y_predicted))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       870
           1       0.89      0.90      0.89       873
           2       0.00      0.00      0.00         2
           3       0.90      0.90      0.90       785
           4       0.76      0.68      0.72        62

    accuracy                           0.93      2592
   macro avg       0.71      0.70      0.70      2592
weighted avg       0.93      0.93      0.93      2592



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


**accuracy with LogisticRegression is : 93**

now we try with another model

In [61]:
tree_grid = GridSearchCV(
    estimator=DecisionTreeClassifier(),
    param_grid={
        "criterion" : ["entropy","gini"],
        "max_depth" : [3,4,5,6,7],
    },
    refit=True,
    verbose=2,
    n_jobs=os.cpu_count(),
    cv=5
)

tree_grid.fit(X_train,y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END .....................criterion=entropy, max_depth=3; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=4; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=3; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=4; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=5; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=5; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=5; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=5; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=5; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=3; total time=   0.0s
[CV] END .....................criterion=entropy, max_depth=3; total time=   0.0s
[CV] END .....................criterion=entropy,

0,1,2
,estimator,DecisionTreeClassifier()
,param_grid,"{'criterion': ['entropy', 'gini'], 'max_depth': [3, 4, ...]}"
,scoring,
,n_jobs,8
,refit,True
,cv,5
,verbose,2
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,7
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [63]:
tree_grid.best_params_

{'criterion': 'gini', 'max_depth': 7}

In [62]:
y_predicted = tree_grid.predict(X_test)
print(classification_report(y_test,y_predicted))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       870
           1       0.88      0.88      0.88       873
           2       0.00      0.00      0.00         2
           3       0.88      0.95      0.91       785
           4       0.00      0.00      0.00        62

    accuracy                           0.92      2592
   macro avg       0.55      0.57      0.56      2592
weighted avg       0.90      0.92      0.91      2592



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
