### Class Setup
*Last updated: 21 Nov 2019*

Develop the class setup for our approach. This will be analogous to the class setup used CS557 Projects/Homeworks and should have access to the same methods.

In [1]:
from modules import utils
from modules import util
import pandas as pd
import pickle
from random import shuffle
import sys

In [2]:
# load training data and testing data
with open('CIFAR10_Data/train.pkl', 'rb') as fp:
    df = pickle.load(fp)
    
# subset to only dog and airplane classes for developement
df = df[df['label_name'].isin(('dog', 'airplane'))].reset_index()

# do a conversion of the hog features to dict format
for i, r in df.iterrows():
    df.at[i, 'hog_features'] = {j: ft for j, ft in enumerate(r['hog_features'])}
    
# load training data
with open('CIFAR10_Data/test.pkl', 'rb') as fp:
    df_test = pickle.load(fp)
    
# subset to only dog and airplane classes for developement
df_test = df_test[df_test['label_name'].isin(('dog', 'airplane'))].reset_index()

# do a conversion of the hog features to dict format
for i, r in df_test.iterrows():
    df_test.at[i, 'hog_features'] = {j: ft for j, ft in enumerate(r['hog_features'])}
    
df.head()

Unnamed: 0,index,label,label_name,batch,hog_features
0,27,5,dog,1,"{0: 0.13396336564598635, 1: 0.1906655101981496..."
1,29,0,airplane,1,"{0: 0.0083773331636311, 1: 0.00253145519529918..."
2,30,0,airplane,1,"{0: 0.04740756179018544, 1: 0.0339916867737869..."
3,35,0,airplane,1,"{0: 0.0077338554316584976, 1: 0.00301717145340..."
4,40,5,dog,1,"{0: 0.07158417861526833, 1: 0.0654108914948450..."


In [6]:
# save modified pickles
with open('CIFAR10_Data/train_2class.pkl', 'w') as fp:
    pickle.dump(df, fp)
    
with open('CIFAR10_Data/test_2class.pkl', 'w') as fp:
    pickle.dump(df_test, fp)

#### Modifying Approximate Q Agent

Original code was set to work with Pacman environment. Our version strips this down to only what is needed. 

Simplifications -
* feature extractor returns the image features (i.e. HOG features) for any given state, where a state in our project is a figure in the training dataset
* legal actions in this case is the same for any given state, it is just the classes we can predict. Predicting a class is a legal action, and since we can predict any class for any given state (image) it is the same for all


In [3]:
class Feature_Extractor():
    """Object for extracting features from an image file. The data is provided in a dataframe variable
    with a label_names column and an index column for state name.
    """
    def __init__(self, df):
        self.states = list(df['index'])
        self.features = list(df['hog_features'])
        
    def getFeatures(self, state, action):
        return self.features[state]


class QLearningClassifier():
    """Agent to use for project. Should be a modified version of ApproximateQAgent from Project/Homework 4
    in CS557.
    """
    def __init__(self, df, epsilon=0.05, gamma=0.8, alpha=0.2):
        # feature extractor object has a method getFeatures(..) that provides a set of features for any given
        # state (i.e. image). It takes an action but this does not affect the featurs returned
        self.featExtractor = Feature_Extractor(df) 
        
        # alpha    - learning rate
        # epsilon  - exploration rate (Not sure what this is, this the random action factor?!?)
        # gamma    - discount factor
        # numTraining - number of training episodes, i.e. no learning after these many episodes
        self.epsilon = float(epsilon)
        self.alpha = float(alpha)
        self.weights = utils.Counter()
        self.labels = list(df['label_name'])
        self.legalActions = list(set(self.labels))
        self.discount = float(gamma)
        
    def getWeights(self):
        return self.weights
    
    def getLabel(self, state):
        return self.labels[state]
    
    def computeActionFromQValues(self, state):
        """
          Compute the best action to take in a state.  Note that if there
          are no legal actions, which is the case at the terminal state,
          you should return None.
        """
        "*** YOUR CODE HERE ***"
        Q_Values = util.Counter()
        actions = self.legalActions
        for a in actions:
            Q_Values[a] = self.getQValue(state, a)

        # Best action (maximizes Q-Value)
        max_action = Q_Values.argMax()
        return max_action
    
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.legalActions
        action = self.computeActionFromQValues(state)
        return action
    
    def getQValue(self, state, action):
        """For a given state, action pair it should return the dot product of the weight vector and the
        feature vector for that state. In our case the feature vector is the image descriptors for that image
        (note that an image is a state in our project).
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        """
        # Q(state, action) = w dot featureVector
        features = self.featExtractor.getFeatures(state, action)
        QValue = sum( self.weights[i] * features[i] for i in features.keys() )
        return QValue
            
    def update(self, state, action, nextState, reward):
        """
           Should update your weights based on transition
        """
        "*** YOUR CODE HERE ***"
        # Q-Values for each a' :
        Q_Counter = util.Counter()

        # legal actions is trivial in our case as there are always as many actions as classes
        a_prime_values = self.legalActions
        for a_prime in a_prime_values:
            # Q-Value for a':
            Q_Counter[a_prime] = self.getQValue(nextState, a_prime)

        # difference = (R + gamma * max[Q(s',a')] ) - Q(s,a)
        difference = (reward + self.discount * Q_Counter[Q_Counter.argMax()] ) - self.getQValue(state, action)

        # wi = wi + alpha * difference * fi(s,a)
        features = self.featExtractor.getFeatures(state, action)
        for i in features.keys():
            self.weights[i] = self.weights[i] + self.alpha * difference * features[i]
        return 0  
    
    def train(self, epochs=10):
        """Run through the training using the trainign dataset.
        """
        
        for i, e in enumerate(range(epochs)):
            print('Epoch {} of {}'.format(i+1, epochs))
            state_list = list(range(len(self.labels)))
            shuffle(state_list)
            
            # get the initial state
            state = state_list.pop()
            while len(state_list) > 0:
                # get the best action for this state by Q-values
                action = self.getAction(state)
                
                nextState = state_list.pop()
                
                # check if action matches label, if true then reward is 1 else it is 0
                
                if action == self.labels[state]:
                    reward = 10
                else:
                    reward = -10
                
                self.update(state, action, nextState, reward)
                
            acc = game.test(df_test)
            print('\tepoch accuracy: {}'.format(acc))
                
    def test(self, test_data):
        legalActions = self.legalActions
        
        # report testing accuracy
        featExtractor = Feature_Extractor(test_data) 
        labels = list(test_data['label_name'])
        
        correct_count = 0
        actions = self.legalActions
        
        state_list = list(range(len(labels)))
                
        for state in state_list:
            Q_Values = util.Counter()
            for a in actions:
                features = featExtractor.getFeatures(state, a)
                QValue = sum( self.weights[i] * features[i] for i in features.keys() )
                Q_Values[a] = QValue

            # Best action (maximizes Q-Value)
            max_action = Q_Values.argMax()
            
            if max_action == labels[state]:
                correct_count += 1
                
        accuracy = float(correct_count) / float(len(state_list))
        return accuracy
        
        
game = QLearningClassifier(df)  
game.train()

Epoch 1 of 10
	epoch accuracy: 0.5
Epoch 2 of 10
	epoch accuracy: 0.5
Epoch 3 of 10
	epoch accuracy: 0.5
Epoch 4 of 10
	epoch accuracy: 0.5
Epoch 5 of 10
	epoch accuracy: 0.5
Epoch 6 of 10
	epoch accuracy: 0.5
Epoch 7 of 10
	epoch accuracy: 0.5
Epoch 8 of 10
	epoch accuracy: 0.5
Epoch 9 of 10
	epoch accuracy: 0.5
Epoch 10 of 10
	epoch accuracy: 0.5


In [None]:
game.weights