<a href="https://colab.research.google.com/github/royal-dit/ML-algorithms/blob/main/support%20vector%20machine/Random_forest_vs_SVM_vs_logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense
from keras.layers import Dropout
from keras.callbacks import LearningRateScheduler
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import GridSearchCV
from keras.constraints import maxnorm
from keras.layers import BatchNormalization,MaxPooling2D,Flatten,Conv2D
mnist = keras.datasets.mnist


In [2]:
(X_train,y_train),(X_test,y_test) = mnist.load_data()

In [3]:
X_train.shape

(60000, 28, 28)

In [4]:
X_train, X_test = X_train/255,X_test/255
x_train = X_train.reshape(len(X_train),28,28)
x_test = X_test.reshape(len(X_test),28,28)
num_classes= 10
x_train.shape


(60000, 28, 28)

In [5]:
#one hot encoding for categorical labels
y_train = keras.utils.to_categorical(y_train,num_classes)
y_test = keras.utils.to_categorical(y_test,num_classes)

In [6]:
from sklearn.model_selection import train_test_split

In [30]:
x_grid,x_not_use,y_grid,y_not_use = train_test_split(x_train,y_train,test_size=0.9)

In [31]:
x_grid.shape,y_grid.shape

((6000, 28, 28), (6000, 10))

In [9]:
x_grid = np.expand_dims(x_grid,axis = 3)
SIZE = x_grid.shape[1]
x_grid.shape


(6000, 28, 28, 1)

In [10]:
def feature_extractor():
  feature = tf.keras.Sequential()
  feature.add(Conv2D(32,3,activation='relu',padding='same',input_shape=(28,28,1)))
  feature.add(BatchNormalization())

  feature.add(Conv2D(32,3,activation='relu',padding='same'))
  feature.add(BatchNormalization())
  feature.add(MaxPooling2D())

  feature.add(Flatten())
  return feature





In [11]:
feature_extractor = feature_extractor()


In [12]:
feature_extractor.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 batch_normalization (BatchN  (None, 28, 28, 32)       128       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 batch_normalization_1 (Batc  (None, 28, 28, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 14, 14, 32)       0         
 )                                                               
                                                        

In [13]:
#this is our X input for random forest
X_for_RF = feature_extractor.predict(x_grid)





**Random forest**

In [14]:
from sklearn.ensemble import RandomForestClassifier
RF_model = RandomForestClassifier(n_estimators=50,random_state = 42)
RF_model.fit(X_for_RF,y_grid)

RandomForestClassifier(n_estimators=50, random_state=42)

In [15]:
#send test data model through same feature extractor process 
X_test_feature = feature_extractor.predict(np.expand_dims(x_test,axis=3))



In [16]:
X_test_feature.shape

(10000, 6272)

In [17]:
#now predict using the trained rf model
prediciton_RF = RF_model.predict(X_test_feature)
prediciton_RF.shape,y_test.shape,y_grid.shape


((10000, 10), (10000, 10), (6000, 10))

In [18]:
#print the overall accuracy
from sklearn import metrics 
print("Accuracy =",metrics.accuracy_score(y_test,prediciton_RF))



Accuracy = 0.8338


In [19]:
# import seaborn as sns
# from sklearn.metrics import confusion_matrix
# cm = confusion_matrix(y_test,prediciton_RF)
# sns.heatmap(cm,annot=True)


#**Hyper parameter Tuning**

In [20]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import LogisticRegression

In [21]:
model_params = {
    'svm':{
        'model':svm.SVC(gamma='auto'),
        'params':{
            'C':[1,], #regularization parameter
            'kernel':['rbf','linear']
        }
    },
    'random_forest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[10,20,30]
        } 
    },
    'logistic_regression':{
        'model':LogisticRegression(solver='liblinear',multi_class='auto'),
        'params':{
            'C':[1,]
        }
    }}

scores=[]






In [53]:
X_for_RF =np.array(X_for_RF)
y_grid = np.array(y_grid)
y_grid = y_grid.reshape(1,-1)
y_grid.shape


(1, 60000)

In [51]:
for model_name,mp in model_params.items():
  grid = GridSearchCV(estimator=mp['model'],
                      param_grid=mp['params'],
                      cv=5,
                      return_train_score=False
                      )
  grid.fit(X_for_RF,y_grid)
  scores.append({
      'model':model_name,
      'best_score':grid.best_score_,
      'best_params':grid.best_params_
      
  })
  

ValueError: ignored

In [56]:
import pandas as pd
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])

In [57]:
print(df)

Empty DataFrame
Columns: [model, best_score, best_params]
Index: []
