## Excercise: Machine Learning Finding Optimal Model and Hyperparameters

For digits dataset in sklearn.dataset, please try the following classifiers and find out the one that gives the best performance. Also find the optimal parameters for that classifier.

```
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
```

In [1]:
from sklearn.datasets import load_digits
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [2]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [9]:
DecisionTreeClassifier().get_params(deep=True)
# did this for all classifiers to check the parameters

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

In [14]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto'),
        'params': {
            'C': [1,5,10,20],
            'kernel': ['rbf','linear']
        } 
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1,3,5,10]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(max_iter=10000),
        'params': {
            'C': [1,5,10,20]
        }
    },
    'guassianNB': {
        'model': GaussianNB(),
        'params': {        
        }
    },
    'multinomialNB': {
        'model': MultinomialNB(),
        'params': {
            'alpha': [1,2]
        }
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy']
        }
    }
}

In [18]:
from sklearn.model_selection import GridSearchCV
scores = []
for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5, return_train_score=False)
    clf.fit(digits.data,digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_,
    })

In [20]:
import pandas as pd 
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.89541,{'n_estimators': 10}
2,logistic_regression,0.91376,{'C': 1}
3,guassianNB,0.806928,{}
4,multinomialNB,0.871464,{'alpha': 2}
5,decision_tree,0.806928,{'criterion': 'entropy'}


***SVM has the highest accuracy with C=1 and kernel as linear.***