In [None]:
""" ___________________________________________________________________________
Code sample to be submitted to Wayfair.                                       .
Created on Wed Dec.5.2018.                                                    .
@author: Fakhteh Saadatniaki                                                  .
                                                                              .
Project: Aritificial Neural Network (ANN) to predict the apply rate on        .
Glassdoor employment website with area under the curve (AUC) as metric of     .
interest.                                                                     .
                                                                              .
Training and test data provided by a lead data scientist @ Glassdoor.         .

Each row in the dataset corresponds to a user’s view of a job listing. It has .
11 columns as described below.
1) title_proximity_tfidf: Measures the closeness of query and job title.      .
2) description_proximity_tfidf: Measures the closeness of query and job       .
   description.                                                               .
3) main_query_tfidf: A score related to user query closeness to job title and .
   job description.                                                           .
4) query_jl_score: Measures the popularity of query and job listing pair.     .
5) query_title_score: Measures the popularity of query and job title pair.    .
6) city_match: Indicates if the job listing matches to user/user-specified    .
   location.                                                                  .
7) job_age_days: Indicates the age of job listing posted.                     .
8) apply: Indicates if the user has applied for this job listing.             .
9) search_date_pacific: Date of the activity.                                 .
10) u_id: ID of user (for privacy reasons ID is anonymized).                  .
11) mgoc_id: Class ID of the job title clicked.                               .

Training set: The examples with the “search date pacific” column (9-th column).
              between 01/21/2018-01/26/2018.                                  .
Test set: The examples with the “search date pacific” column (9-th column)    .
              on 01/27/2018.                                                  .
Inputs to the input layer::Features: First 7 columns.
Label {0,1}: 8th column, i.e., apply.                                         .
____________________________________________________________________________"""

""" __________________________ Importing Modules ___________________________"""
import numpy as np
import pandas as pd
np.random.seed(1)

""" ______________________ Preprocessing the Data __________________________"""
filename = 'GlassdoorData.csv'
raw_data = pd.read_csv(filename)
num_features = 7

In [None]:
print(raw_data)

In [None]:
'''   ______________ Removing Examples with Missing Data ________________   '''
raw_data_ = raw_data.replace({-1:np.nan}).dropna()
raw_data_.to_csv('RawData.csv', sep=',', encoding='utf-8')

In [None]:
print(raw_data_)

In [None]:
'''   _________________________ Training Set ____________________________   '''
train_flag = raw_data_['search_date_pacific'] == '2018-01-25'
raw_data_train = raw_data_[train_flag]
features_train = raw_data_train.iloc[:,0:num_features]
# n_train = features_train.shape[0]
ones_col_vec = np.ones(features_train.iloc[:,5].shape)
#features_train = np.matrix(features_train.values)
#ones_col_vec = np.ones(features_train[:,5].shape)
#ones_col_vec = np.matrix(ones_col_vec)
# Standardize all features except for the binary feature city_match
for f in range(num_features):
    if f !=5:
        f_mean = np.mean(features_train.iloc[:,f])
        #f_mean = np.mean(features_train[:,f])
        features_train.iloc[:,f] = features_train.iloc[:,f] - f_mean*ones_col_vec
        #features_train[:,f] = features_train[:,f] - f_mean*ones_col_vec
        f_std = np.std(features_train.iloc[:,f])
        #f_std = np.std(features_train[:,f])
        features_train.iloc[:,f] = features_train.iloc[:,f]/f_std
        #features_train[:,f] = features_train[:,f]/f_std

features_train.insert(0,'bias',ones_col_vec)
features_train.to_csv('TrainingFeat.csv', sep=',', encoding='utf-8')
labels_train = raw_data_train.iloc[:,num_features]
labels_train.to_csv('TrainingLabels.csv', sep=',', encoding='utf-8')
#labels_train = labels_train.values

In [None]:
print(features_train)

In [None]:
'''   ____________________________ Test Set _____________________________   '''
test_flag = raw_data_['search_date_pacific'] == '2018-01-27'
raw_data_test = raw_data_[test_flag]
features_test = raw_data_test.iloc[:,0:num_features]
ones_col_vec = np.ones(features_test.iloc[:,5].shape)
#features_test  = np.matrix(features_test.values)
#ones_col_vec = np.matrix(np.ones(features_test[:,5].shape))
# Standardize all features except for the binary feature city_match
for f in range(num_features):
    if f !=5:
        f_mean = np.mean(features_test.iloc[:,f])
        #f_mean = np.mean(features_test[:,f])
        features_test.iloc[:,f] = features_test.iloc[:,f] - f_mean*ones_col_vec
        #features_test[:,f] = features_test[:,f] - f_mean*ones_col_vec
        f_std = np.std(features_test.iloc[:,f])
        #f_std = np.std(features_test[:,f])
        features_test.iloc[:,f] = features_test.iloc[:,f]/f_std
        #features_test[:,f] = features_test[:,f]/f_std
        
features_test.insert(0,'bias',ones_col_vec)
print(features_test)
features_test.to_csv('TestFeat.csv', sep=',', encoding='utf-8')
labels_test = raw_data_test.iloc[:,num_features]
labels_test.to_csv('TestLabels.csv', sep=',', encoding='utf-8')
#labels_test = labels_test.values
yes_example = np.isin(labels_test,1) # True +
no_example = np.isin(labels_test,0) # True - 

In [None]:
"""________________________ ReLU Activation Function _______________________"""     
class ReLU:
    @staticmethod
    def activation(a):
        a[a < 0] = 0
        return a
    @staticmethod
    def deriv(a):
        #a[a>0] = 1
        return a>0
"""_________________________________________________________________________"""

"""______________________ Sigmoid Activation Function ______________________"""     
class Sigmoid:
    @staticmethod
    def activation(a):
        a = max(-50,a) # to avoid numerical issues
        return 1 / (1 + np.exp(-a))
    @staticmethod
    def deriv(a):
        return Sigmoid.activation(a) * (1 - Sigmoid.activation(a))
"""_________________________________________________________________________"""

In [None]:
"""_______________________ Loss Function: Cross Entropy ____________________"""
class CE:
    def __init__(self, activ_fn):
        '''
        Args:
            param activ_fn: Object of class activation function; e.g., 
            Sigmoid and ReLU
        '''
        self.activ_fn = activ_fn

    def activation(self, a):
        return self.activ_fn.activation(a)

    @staticmethod
    def loss(t, y):
        '''
        Args:
            param t (int \in {0,1}) True label.
            param y (float \in [0,1]):  Generated output, probability!
        Return: 
            ce (flt): Cross entropy.
        '''
        ce = -(t*np.log(y)+(1-t)*np.log(1-y))
        return ce
"""_________________________________________________________________________"""

In [None]:
""" ____________________ Artificial Neural Network Setup ______________________
                                                                              .
Input layer: | Hidden layer(s) | Output layer (P[Apply=1])|                   .
     /\                                                                       .
     \/            /\                                                         . 
     /\            \/                                                         .
     \/            /\                                 |`````````|             .
     /\            \/                          /\ __\ |Threshold| />: Yes     .
     \/            /\                          \/   / |_________| \<: No      .
     /\            \/                                                         .
     \/            /\                                                         .
     /\            \/                                                         .
     \/            /\                                                         .
     /\            \/                                                         .
     \/            /\                                                         .
     /\            \/                                                         .
     \/                                                                       .
7+1(bias) Neurons | n_h+1(bias) Neurons in hidden layer h | 1 Neuron          .
____________________________________________________________________________"""
class Network:
    def __init__(self, dim, activ):
        '''
        Args:
            param dim (any iterable): Dimensions of the neural net where the 
                elements at index i of the iterable denotes the number of nodes 
                in the corresponding layer: (num_features, n_1,...,n_H,n_o); 
                e.g., (7+1,n_1+1,1)
            param activ (any iterable) : Activation function(s) to be applied 
                to neurons in the order of layers; e.g., ReLU for the hidden
                layer and Sigmoid for the output layer.
        Example architecture:
        - Input layer: 8 Neurons
        - One hidden layer: 6 Neurons
        - Output layer: 1 Neuron
        Layer               |  1      2         3
        ------------------------------------------
        # of neurons (dim)  | [8,     6,        1]
        Activation (active) |       (Relu, Sigmoid)
        '''
        
        self.num_layers = len(dim)
        self.loss = None
        self.learning_rate = None
        
        '''   _ Setup & Initialization of Weights,Biases, & Activations _   '''
        # Keys are the layers with 1 being input, 2 hidden, and 3, the output.
        self.W = {} # W={1:W[1] 2:W[2]}

        self.activ = {} # active = {2:'ReLu', 3:'Sigmoid'}

        for lay in range(len(dim) - 1):
            self.W[lay + 1] = np.random.uniform(-0.1,0.1,(dim[lay+1],dim[lay]))
            self.activ[lay + 2] = activ[lay] # no activation applied to inputs
    '''_____________________________________________________________________'''
    def _feed_forward(self, x):
        '''
        Forward propagation.
        Args:
            param x (2D array): Augmented input feature vector.
        Return: 
            z(dict): Inputs or neuron activations
            a(dict): weighted sum of inputs to a neuron
            y (float): Predicted output, probability.
        '''
        # First layer has no activations and x is the augmented input:
        z = {1: x}  
        a = {}    

        # Hidden layer weighted sums and activations:  
        a[2] = np.dot(self.W[1],z[1])
        z[2] = self.activ[2].activation(a[2])
        z[2][0] = 1 # bias neuron
        
        # Output layer weighted sum and activation/predicted output:
        a[3] = np.dot(self.W[2],z[2])
        z[3] = self.activ[3].activation(a[3])
        #y = z[3] # The predicted output for augmented feature vector x.
        return z, a#, y
    '''_____________________________________________________________________'''
    def _back_prop(self, z, a, t):
        '''
        Args:
        z = { 1: x,
              2: ReLu(W[1]x)
              3: Sigmoid(W[2]a[2]) | Predicted output
              }
        a = { 2: W[1]x
              3: W[2]z[2]
              }

        param t(int \in {0,1}) True label.
        '''
        
        # Determine delta and partial derivative for the output layer:
        delta = z[self.num_layers]-t
        dw = delta * z[self.num_layers - 1].T

        updates = {2 : dw}

        '''Backpropagate the delta of output layer to obtain delta for each 
           neuron in the hidden layer and determine the partial derivative.'''
        delta = np.multiply(np.dot(self.W[2].T,delta),self.activ[2].deriv(a[2]))
        dw = np.outer(delta,z[1])
        updates[1] = dw
        
    # Update the weights
        for lay, dw in updates.items():
            self._update_w_b(lay, dw)

    def _update_w_b(self, layer, dw):
        '''
        Update weights and biases according to stochastic gradient descent.
        Args:
            param layer (int): Number of the layer
            param delta_w (array): Partial derivatives of ce w.r.t. the weights
            param delta (array): Delta error.
        '''

        self.W[layer] -= self.learning_rate * dw
        
    '''_____________________________________________________________________'''
    def train(self, x_vec, t_vec, loss, epochs, learning_rate=1e-3):
        """
        Train the neural network.
        Args:
        param x (2D array): Augmented feature vectors
        param t (1D array): Containing biary labels.
        param loss: Loss class (CrossEntropy in case of classification)
        param epochs (int): Number of epochs for SGD.
        param learning_rate (flt)
        """
        if not x_vec.shape[0] == t_vec.shape[0]:
            raise ValueError("Length of x and t arrays don't match")
        # Initiate the loss object with the final activation function
        self.loss = loss(self.activ[self.num_layers])
        self.learning_rate = learning_rate

        for iter in range(epochs):
            # Shuffle the data
            shuffle = np.random.permutation(len(x_vec))
            x_ = x_vec.iloc[shuffle]
            t_ = t_vec.iloc[shuffle]

            for ex in range(x_vec.shape[0]):
                example = x_.iloc[ex,:]
                z, a = self._feed_forward(example)
                self._back_prop(z, a, t_.iloc[ex])
            print(iter)
            '''
            if (iter + 1) % 10 == 0:
                z, - = self._feed_forward(x)
                print("Loss:", self.loss.loss(t, z[self.n_layers]))
            '''
    def test(self, x_vec, threshold):
        """
        Args:
            param x (array) Augmented feature vectors
            param threshold (float between 0 and 1)
        Return:
            y_pred (list) A 2D array of shape (n_test, {0,1}).
        """
        y_pred = []
        for ex in range(x_vec.shape[0]):
            z, a = self._feed_forward(x_vec.iloc[ex,:])
            y_pred.append(z[self.num_layers]>threshold)
        return y_pred

In [None]:
#if __name__ == "__main__":
nn = Network((8, 6, 1), (ReLU, Sigmoid))

In [None]:
print(nn.W)

In [None]:
print(nn.activ)

In [None]:
nn.train(features_train, labels_train, loss=CE, epochs=1, learning_rate=1e-3)

In [None]:
pred_prob = nn.test(features_test,0.5)

In [None]:
print(pred_prob)

In [None]:
df = pd.DataFrame({'col':pred_prob})

In [None]:
print(df)

In [None]:
df.to_csv('Pred.csv', sep=',', encoding='utf-8')