<a href="https://colab.research.google.com/github/swapon15/machine_learning/blob/master/CDE_Extended.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Install PyDrive and import necessary modules to download files from GDrive
Source:Code Snippets - Downloading files or importing data from google drive

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

##Downloading the files

In [0]:
# Download file(s) based on file ID.
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id_1 = '15ztdu7YfMJVvAX3nLQrl9w8VPZolJxMU' #data.py
file_id_2 = '1WbFNmolzLI3GjQ1MFcNEh_r7r6ahSWir' #constant.py
#file_id_3 = '1Wy_R-Ao93mvn91AgkyJLw_7tqpCDbKcE' #doTest.py
file_id_4 = '1yKMFBNXTGggA2NdCQAWZA-MGV_N-QoID' #sparsity.py
file_id_5 = '1jcASbHMOEExGDsz1LsBSs8kGY8dJG2z8' #myDataset.py
file_ids = [file_id_1,file_id_2,file_id_4, file_id_5]
all_modules = []
for id in file_ids:
    all_modules.append(drive.CreateFile({'id': id}))
file_names = ['data.py', 'constant.py','sparsity.py', 'myDataset.py']
for i in range(len(all_modules)):
    all_modules[i].GetContentFile(file_names[i])
    

##Import data modules and necessary sklearn module

In [0]:

import data as pdbc
import constant
import numpy as np
from sklearn.neural_network import MLPClassifier


##Splitting the test solutions randomly into train and test

In [56]:
%%writefile splitter.py
def split_test_solutions(test_solutions, split_ratio=0.40):
  """
  Spliter of test solutions.
  It splits the test solutions by given split_ratio. Separates 
  test solutions randomly into two subsets. One subset is for training
  and the other one is for testing.
  
  Parameters:
  test_solution (dictionary): each key is a tuple of features e.g. (1, 2, 10, 1)
                               for a two dimensional number games. Here, 1, 2 is 
                               a number and 10, 1 is another number. 
                               
                               each value is a tuple of win/loss and respective index/
                               position numbers of the key value. e.g., (True, 9999)
                               
  split_ratio        (float):  the ratio for splitting test solutions. The solutions are
                               splitted into split_ratio * 100 for trainin and (1 - split_ratio) * 100
                               for testing.
  
  Returns            (tuple):  a tuple of training and test solutions. 
  
  """
  training_points = np.floor(split_ratio * len(test_solutions))
  selected_points = []
  training_set, test_set = {}, {}
  
  while len(selected_points) < training_points:
    for features, labels in test_solutions.items():
      random_position = np.random.random_integers(0, training_points - 1)
      if random_position not in selected_points:
        training_set[features] = labels
        selected_points.append(random_position)
      else:
        test_set[features] = labels

  return (training_set, test_set)


def make_data_compatible(training_solutions, test_solutions):
    """
    Make training and test solutions to be compatible for sklearns
    MLPClassifier.
    
    Parameters:
      training_solutions (dict): randomly splitted training data
      test_solutions     (dict): randomly splitted test data
    Returs:
          X_train  (list): all the training samples
          y_train  (list): respective features for the training samples
          X_test   (list): all the  test samples
          y_test   (list): respective features for the test samples 
    
    """
    X_train, y_train, X_test, y_test = [], [], [], []

    for features, labels in training_solutions.items():
        X_train.append(list(features))
        y_train.append(labels[0])

    for features, labels in test_solutions.items():
        X_test.append(list(features))
        y_test.append(labels[0])
    return (X_train, y_train, X_test, y_test)



def get_data(amount):
    """
    Calls several data producer and splitter for test solutions.
  
    Parameters: amount (float): ratio of splitting the test solutions
  
    Returns: two subsets created from original dataset of test solutions
    """
    all_test_solutions, no_need = pdbc.getSyntheticData(constant.COMP_ON_ONE)
    training_data, test_data = split_test_solutions(all_test_solutions, amount) # random split of data by 40%
    return make_data_compatible(training_data, test_data)


Overwriting splitter.py


## A Neural Network with One hidden layer 

In [57]:
%%writefile neural.py
def build_MLP():
    """
    Sets the parameters of learning process of a neural network.
    Parameters: No parameter
    Returns   : a mlp instance
    """
    mlp = MLPClassifier(
                  hidden_layer_sizes = (10, ), # one hidden layer with 10 nodes
                  max_iter=100,
                  alpha=1e-4,
                  solver='lbfgs',
                  tol=1e-4,
                  random_state=1
                  )
    return mlp

def get_MLP_test_accuracy(X_train, y_train, X_test, y_test):
    """
    train the model, test it and returns test accuracy
    Parameters:
    X_train  (list): all the training samples
    y_train  (list): respective features for the training samples
    X_test   (list): all the  test samples
    y_test   (list): respective features for the test samples
    
    Returns:
    accuracy (float): test accuracy of the model
    """
    mlp = build_MLP()
    mlp.fit(X_train, y_train)
    accuracy = mlp.score(X_test, y_test)
    return accuracy

def get_MLP_prediction(X_train, y_train, X_test, y_test):
    """
    train the model and predict on t test data. Then retuns the predicted
    labels 
    Parameters:
    X_train  (list): all the training samples
    y_train  (list): respective features for the training samples
    X_test   (list): all the  test samples
    y_test   (list): respective features for the test samples
    
    Returns:
    predicted_lable (list): labels of the prediction
    """
    mlp = build_MLP()
    mlp.fit(X_train, y_train)
    predicted_label = mlp.predict(X_test)
    return predicted_label

Overwriting neural.py


##Client code for data splitter and neural network 

In [58]:
exec(open('splitter.py').read())
exec(open('neural.py').read())

amount = 0.40
X_train, y_train, X_test, y_test = get_data(amount)
predicted_labels = get_MLP_prediction(X_train, y_train, X_test, y_test)
print(predicted_labels)




[False False False ... False False  True]
