# Use Case 1- Predict Employee Attrition

## Preprocessing Attrition Data

In [None]:
#Load the dataset and analyze

import pandas as pd
import os
import tensorflow as tf
import numpy as np

attrition_data = pd.read_csv("employee_attrition.csv")

print("Data Loaded:\n------------------------\n",attrition_data.dtypes)
attrition_data.head()

In [None]:
#Correlation Analysis of target attribute

attrition_data.corr()['Attrition']

In [None]:
#Convert to Dataframe to numpy array
np_attrition = attrition_data.to_numpy().astype(float)

#Create X_train with the first 7 attributes
X_train = np_attrition[:,1:7]
#Create Y_train with attrition attribute
Y_train=np_attrition[:,7]

#Convert Y_train to one-hot-encoding
Y_train = tf.keras.utils.to_categorical(Y_train,2)

print("X-Train Shape : ", X_train.shape)
print("Y-Train Shape : ", Y_train.shape)

## Build Attrition model with Keras

In [None]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2

#Setup hyperparameters for deep learning
EPOCHS=100
BATCH_SIZE=100
VERBOSE=1
NB_CLASSES=2
N_HIDDEN=128
VALIDATION_SPLIT=0.2

#Create a Keras model
model = tf.keras.models.Sequential()

#Add first hidden Dense layer
model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(6,),
                              name='Dense-Layer-1',
                              activation='relu'))

#Add a second hidden dense layer
model.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-2',
                              activation='relu'))

#Add a final layer with softmax
model.add(keras.layers.Dense(NB_CLASSES,
                             name='Final',
                             activation='softmax'))

#Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

#Fit parameters 
model.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

## Predict Attrition with Keras

In [None]:
TotalMonthsOfExp=40
TotalOrgsWorked=4
MonthsInOrg=20
LastPayIncrementBand=5
AverageFeedback=4
LastPromotionYears=4

print("Will employee leave ?", model.predict_classes([[TotalMonthsOfExp,
                                  TotalOrgsWorked,
                                  MonthsInOrg,
                                  LastPayIncrementBand,
                                  AverageFeedback,
                                  LastPromotionYears]]))

In [None]:
#Bulk predictions

print(model.predict_classes(
    [[111,5,85,3,2,2],
    [31,2,15,4,1,4],
    [61,4,24,1,4,3],
    [77,4,35,3,1,1],
    [81,5,7,1,2,3],
    [113,4,112,5,4,1],
    [101,2,48,5,1,4],
    [45,4,22,5,3,1],
    [25,2,2,2,3,2],
    [97,3,15,3,2,4]]))

# Use Case 2 - Discovering Virtual Teams

## Preparing Network Data

In [None]:
import networkx as nx
import matplotlib.pyplot as plt

from csv import reader
import pandas as pd

#Input file with one record per chat collaboration
chat_csv = "chat_groups.csv"

#Data frame to store employee pairs.
employee_pairs = pd.DataFrame(columns=['First', 'Second', 'Count'])

#Read file and extract pairs and weights
with open(chat_csv, 'r', encoding="utf-8-sig") as read_obj:
    # pass the file object to reader() to get the reader object
    csv_reader = reader(read_obj)
    # Iterate over each row in the csv using reader object
    for row in csv_reader:
        #Sort by employee name
        row.sort()
        #sort and filter for only valid names
        filtered_row = [ emp for emp in row
                        if len(emp) > 0] 

        #Generate employee pairs
        
        #Iterate for the first employee
        for i in range(0, len(filtered_row)-1):
            #Iterate for the second employee
            for j in range(i+1,len(filtered_row) ):
            
                first=filtered_row[i]
                second=filtered_row[j]

                #Create the pair record. If Dataframe record already exists
                #Update the count. If not, create it
                curr_rec = employee_pairs[
                                (employee_pairs['First'] == first )
                                & (employee_pairs['Second'] == second)]

                if ( curr_rec.empty) :
                    new_df = pd.DataFrame([{'First': first,
                                            'Second' : second,
                                            'Count':1}])
                    employee_pairs=employee_pairs.append(new_df,
                                                         ignore_index=True)

                else:
                    curr_rec.at[curr_rec.index[0],'Count'] = curr_rec.at[curr_rec.index[0],'Count'] + 1
                    employee_pairs.update(curr_rec)
                
print(employee_pairs)

## Create and visualize the network

In [None]:
#Create a networkX graph
graph_emps  = nx.Graph()

#Add Edges based on the dataframe (nodes gets added automatically)
for i,row in employee_pairs.iterrows():
    graph_emps.add_edge(row['First'],  
                        row['Second'],   
                        weight=row['Count'])


#Print network summary
print("Network summary: \n-----------------\n", nx.info(graph_emps))

In [None]:
# Create different types of edges based on their cohesion

#Pairs with Count > 5 for high cohesion
elarge = [(x1, x2) for (x1, x2, data) in graph_emps.edges(data=True) 
          if data['weight'] > 5]

#Pairs with Count between 4 and 5 for medium cohesion
emedium = [(x1, x2) for (x1, x2, data) in graph_emps.edges(data=True) 
          if  3 < data['weight'] <= 5]

#Pairs with Count less than 4 for low cohesion
esmall = [(x1, x2) for (x1, x2, data) in graph_emps.edges(data=True) 
          if data['weight'] <= 3]

pos = nx.spring_layout(graph_emps)  # positions for all nodes

## Setup the Graph
# nodes
nx.draw_networkx_nodes(graph_emps, pos, 
                       node_size=700,
                       node_color='orange')


nx.draw_networkx_edges(graph_emps, pos, 
                       edgelist=elarge,
                       width=6,
                       edge_color='blue')

nx.draw_networkx_edges(graph_emps, pos, 
                       edgelist=emedium,
                       width=4,
                       edge_color='green')

nx.draw_networkx_edges(graph_emps, pos, 
                       edgelist=esmall,
                       width=2, 
                       edge_color='gray')

# labels
nx.draw_networkx_labels(graph_emps, 
                        pos, 
                        font_size=16, 
                        font_family='Consolas')


plt.axis('off')
plt.show();

## Analyzing the network

In [None]:
#Function to sort a dictionary by value
def sort_dict(dict):
    sorted_dict= sorted(dict.items(), key=lambda x: x[1],reverse=True)
    
    for key,value in sorted_dict:
        print(key, " = ", value)


#find number of nodes they are connected with
print("\nNodes Mason is connected with :\n-------------------------------")
print(nx.degree(graph_emps,'Mason'))

#clustering - how close a team they form
print("\nClustering Co-efficient:\n----------------------")
sort_dict(nx.clustering(graph_emps,weight='weight'))

#Find centrality of nodes
print("\nCentrality :\n---------------")
sort_dict(nx.degree_centrality(graph_emps))

print("\nBetweenness:\n--------------")
sort_dict(nx.betweenness_centrality(graph_emps))

# Use Case 3 - Recommend Courses to Employees

This exercise builds a model that predicts the rating, a given employee will provide for a given course. We then use this model to identify courses that the employee would prefer most

## Load Data

In [None]:
#Loading data

from csv import reader
import pandas as pd
import os
import numpy as np
import math

ratings_data = pd.read_csv("employee_course_ratings.csv")

ratings_data.head()

## Prepare for Embedding

In [None]:
#Build list of unique Employees
emp_list=ratings_data.groupby(
    ['EmployeeID','EmpName']).size().reset_index()
emp_list.head()
print("Total Employees: ",len(emp_list))

#Build list of unique Courses
course_list=ratings_data.groupby(
    ['CourseID','CourseName']).size().reset_index()
course_list.head()

print("Total Courses: ", len(course_list))

In [None]:
from tensorflow.keras.layers import Input, Embedding, Flatten, Dot, Dense, Concatenate
from tensorflow.keras.models import Model

#build employee embedding vector
#we are using IDs a the direct index to embedding.
#Since IDs are continuous, we dont need ID-name mapping.
#We can also build a vocabulary alternatively.

emp_input = Input(shape=[1], name="Emp-Input")
emp_embed = Embedding(2001,  #max value of employee ID
                      5,
                      name="Emp-Embedding")(emp_input)
emp_vec = Flatten(name="Emp-Flatten")(emp_embed)

#build course embedding vector
course_input = Input(shape=[1],name="Course-Input")
course_embed = Embedding(len(course_list) + 1,
                         5,
                         name="Course-Embedding")(course_input)
course_vec = Flatten(name="Course-Flatten")(course_embed)

#merge the vectors
merged_vec = Concatenate()([emp_vec,course_vec])

## Building the Keras Rating Model

The recommendation works as follows

1. Build a model that can predict the rating, a given employee may give to a course he/she has not taken so far
2. Use the model to predict possible ratings for all courses, for this employee.
3. Recommend the courses that have the top predicted ratings

In [None]:
#Given an Employee and a Course, this model predicts the 
#rating the employee will give this couse

from sklearn.model_selection import train_test_split

ratings_train, ratings_test = train_test_split(
                                ratings_data, test_size=0.1)

#add fully connected layers
fc_layer1 = Dense(128,activation="relu")(merged_vec)
fc_layer2 = Dense(32, activation="relu")(fc_layer1)
model_output = Dense(1)(fc_layer2)

rating_model= Model([emp_input,course_input],model_output)

rating_model.compile(optimizer="adam",
                     loss="mean_squared_error")

rating_model.summary()

print("Fitting the model:")
#Fit the model
model_fit = rating_model.fit(
    x=[ratings_train.EmployeeID, ratings_train.CourseID],
    y=ratings_train.Rating,
    epochs=25,
    verbose=1,
    validation_split=0.1
    )

print("Evaluating the model:")
rating_model.evaluate(
    x=[ratings_test.EmployeeID, ratings_test.CourseID],
    y=ratings_test.Rating)

## Recommending Courses with Keras

In [None]:
#Predicting the Rating for a given employee and a course
#for employee 1029 and course 8

rating_model.predict(
    [pd.Series([1029]),
     pd.Series([8])])

In [None]:
emp_to_predict="Harriot Laflin"

#Get employee ID for the employee name
pred_emp_id=emp_list[emp_list['EmpName'] == emp_to_predict]["EmployeeID"].iloc[0]

#find Courses already taken by employee. We dont want to predict those.
completed_courses=ratings_data[
                    ratings_data["EmployeeID"] == pred_emp_id]["CourseID"].unique()

#Courses not taken by employee
new_courses = course_list.query("CourseID not in @completed_courses")["CourseID"]

#Create a list with the same employee ID repeated for the same number of times as the
#number of new courses. This provides the employee and course Series with same size
emp_dummy_list=pd.Series(np.array([pred_emp_id for i in range(len(new_courses))]))

#Predict ratings for the new courses for this employee
projected_ratings = rating_model.predict([emp_dummy_list,new_courses])
flat_ratings = np.array([x[0] for x in projected_ratings])

print("Course Ratings: ", flat_ratings)

#Recommend top 5 courses
print("\nRating  CourseID CourseName\n-----------------------------------")
for idx in (-flat_ratings).argsort()[:5]:
    course_id=new_courses.iloc[idx]
    course_name=course_list.query("CourseID == @course_id")["CourseName"].iloc[0]
    print(" ", round(flat_ratings[idx],1),"    ", course_id, "   ", course_name)

