# KMINST Classifier using Model Averaging

We combine the best models that we have from AlexNet, LeNet etc and calculate the model average. The model averaging works by taking three models with prior knowledge of one of the models being the best.
- If all of the models agree, then we do not change the predictions 
- if two agree and one disagrees, then we take the majority prediction 
- If none agree, then we choose from the model with the highest validation accuracy 

This approach was used for one of our best submissions on Kaggle 

##1. Functions necessary to use methods in Utils.ipynb

In [0]:
import io, os, sys, types
import nbformat

from IPython import get_ipython
from IPython.core.interactiveshell import InteractiveShell

In [0]:
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path
    
    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
            
class NotebookLoader(object):
    """Module Loader for IPython Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path
    
    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)
        
        print ("importing notebook from %s" % path)
                                       
        # load the notebook object
        nb = nbformat.read(path, as_version=4)
        
        
        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod
        
        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__
        
        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod

class NotebookFinder(object):
    """Module finder that locates IPython Notebooks"""
    def __init__(self):
        self.loaders = {}
    
    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return
        
        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)
        
        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]
      

sys.meta_path.append(NotebookFinder())

### Mount Google drive

In [4]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


### Show that file exists in Google path
**Note**:  You will need to ensure that you have a folder named "**KMNIST_ENTROPY**" in your Google Drive, that contains  Utils.ipynb (and  \__init__.py)

In [5]:
ls gdrive/My\ Drive/KMNIST_ENTROPY

AlexNet.ipynb  Model_Averaging.ipynb  [0m[01;34m__pycache__[0m/
__init__.py    [01;34mmodels[0m/                [01;34mresults[0m/
LeNet.ipynb    PCA_Classifier.ipynb   Utils.ipynb


### Append the system path and import Utils.ipynb

In [0]:
sys.path.append('gdrive/My Drive/')

In [7]:
from KMNIST_ENTROPY.Utils import *

# If this cell gives a "No Module Found Error", please restart the runtime of the collab notebook 

importing notebook from gdrive/My Drive/KMNIST_ENTROPY/Utils.ipynb
Populating the interactive namespace from numpy and matplotlib
Cuda installed! Running on GPU!
Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
Utils.ipynb has finished downloading


## 2. Function to combine the models
### Function to combine predictions of the 3 models

In [0]:
def Model_Combine(prediction1, prediction2, prediction3):
    """
    This function combines the predictions of the models to give the best 
    predictions

    Input: prediction1 - list of prediction labels from first model
                         Note": this is seen as the most accurate model given
           prediction2 - list of prediction labels from second model
           prediction3 - list of prediction labels from third model
           
    Returns: list of predictions which give the highest accuracy score
    """
    pred1 = prediction1
    pred2 = prediction2
    pred3 = prediction3

    combined_prediction = [];

    for i in range(10000):
      # if pred3 and pred2 are the same and different than pred1 than take the value of pred2
      if (pred1[i] != pred2[i] and pred1[i] != pred3[i] and pred2[i] == pred3[i]):
        final_pred = pred2[i] # np.append(final_pred, mode([pred1[i], pred2[i], pred3[i]]));
      else:
        final_pred = pred1[i];

      #if all models failed chances are they could have all failed
      if (pred1[i] != pred2[i] and pred1[i] != pred3[i] and pred2[i] != pred3[i]):
        final_pred = 4 # least predicted number in 3 models

      combined_prediction.append(final_pred);

    combined_prediction = np.array(combined_prediction);
    #save_csv_file("dataframe_LeNet_enlarged_aug_data_combinedXXX", combined_prediction)

    return combined_prediction;


### Function to combine predictions of the 5 models

In [0]:
def Model_Combine5(prediction1, prediction2, prediction3, prediction4, prediction5):  
    """
    This function combines the predictions of the models to give the best 
    predictions

    Input: prediction1 - list of prediction labels from first model
                         Note": this is seen as the most accurate model given
           prediction2 - list of prediction labels from second model
           prediction3 - list of prediction labels from third model
           
    Returns: list of predictions which give the highest accuracy score
    """
    pred1 = prediction1
    pred2 = prediction2
    pred3 = prediction3
    pred4 = prediction4
    pred5 = prediction5  

    combined_prediction = [];

    for i in range(10000):

      # final prediction is the most common prediction
      final_pred = mode([pred1[i], pred2[i], pred3[i], pred4[i], pred5[i]]).mode[0]

      combined_prediction.append(final_pred);

    combined_prediction = np.array(combined_prediction);
    #save_csv_file("dataframe_LeNet_enlarged_aug_data_combined5XX", combined_prediction)

    return combined_prediction;

## 3.  Combining 5 models

In [0]:
# 5 model combination
path = "/content/gdrive/My Drive/KMNIST_ENTROPY/results/"

# Read csv files and convert to array 
prediction_file1a = path+"LeNet5_kmnist_classifier_random_aug_3_choice_5_kernel_ep_30_complete.csv"
predpd1 = pd.read_csv(prediction_file1a) 
prediction_file1 = np.array(predpd1['Category'])

prediction_file2a = path+"LeNet5_kmnist_classifier_random_aug_4_choice_5_kernel_ep_20_complete.csv"
predpd2 = pd.read_csv(prediction_file2a) 
prediction_file2 = np.array(predpd2['Category'])

prediction_file3a = path+"Lenet3_fulltraining_random_transfer_wd1e-3_k3.csv"
predpd3 = pd.read_csv(prediction_file3a) 
prediction_file3 = np.array(predpd3['Category'])

prediction_file4a = path+"LeNet5_kmnist_classifier_random_aug_20_choice_5_kernel_ep_10_complete.csv"
predpd1 = pd.read_csv(prediction_file4a) 
prediction_file4 = np.array(predpd1['Category'])

prediction_file5a = path+"AlexNet_fiftyepoch.csv"
predpd2 = pd.read_csv(prediction_file5a) 
prediction_file5 = np.array(predpd2['Category'])

# Final combination
pred_combined2 = Model_Combine5(prediction_file1, prediction_file2, prediction_file3, prediction_file4, prediction_file5)

## 4. Recursive combination with two separate best models  

We have combined the previous 5 models into one model. Then, we take this model and combine with two of our best models. Note that these two models are separate from the previous 5 models used. 

In [0]:
#best 3 (combining the combined 5 models and with our best 2 models)
path = "/content/gdrive/My Drive/KMNIST_ENTROPY/results/"

prediction_file1 = path+"LeNet5_kmnist_classifier_random_aug_20_choice_5_kernel_ep_10_complete_extended.csv"
prediction_file2 = path+"LeNet5_kmnist_classifier_random_aug_20_choice_5_kernel_ep_15_complete_extended.csv"

# Read csv files and convert to array  
predpd1 = pd.read_csv(prediction_file1) 
pred1 = np.array(predpd1['Category'])

predpd2 = pd.read_csv(prediction_file2)
pred2 = np.array(predpd2['Category'])

# Final combination
pred_combined_all = Model_Combine(pred_combined2, pred1, pred2)

### Save CSV file 
Save the final csv file of the combined predictions 

In [0]:
save_predictions_ns(pred_combined_all, name="pred_combined_all")