In [5]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# import jtplot module in notebook
from jupyterthemes import jtplot

# choose which theme to inherit plotting style from
# onedork | grade3 | oceans16 | chesterish | monokai | solarizedl | solarizedd
jtplot.style(theme='onedork')

# set "context" (paper, notebook, talk, poster)
# scale font-size of ticklabels, legend, etc.
# remove spines from x and y axes and make grid dashed
jtplot.style(context='talk', fscale=1.4, spines=False, gridlines='--')

# turn on X- and Y-axis tick marks (default=False)
# turn off the axis grid lines (default=True)
# and set the default figure size
jtplot.style(ticks=True, grid=False, figsize=(6, 4.5))

# reset default matplotlib rcParams
jtplot.reset()

In [6]:
df=pd.read_csv("HR_comma_sep.csv")
columns_names=df.columns.tolist()
correlation=df.corr()

In [7]:
df.corr()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years
satisfaction_level,1.0,0.105021,-0.14297,-0.020048,-0.100866,0.058697,-0.388375,0.025605
last_evaluation,0.105021,1.0,0.349333,0.339742,0.131591,-0.007104,0.006567,-0.008684
number_project,-0.14297,0.349333,1.0,0.417211,0.196786,-0.004741,0.023787,-0.006064
average_montly_hours,-0.020048,0.339742,0.417211,1.0,0.127755,-0.010143,0.071287,-0.003544
time_spend_company,-0.100866,0.131591,0.196786,0.127755,1.0,0.00212,0.144822,0.067433
Work_accident,0.058697,-0.007104,-0.004741,-0.010143,0.00212,1.0,-0.154622,0.039245
left,-0.388375,0.006567,0.023787,0.071287,0.144822,-0.154622,1.0,-0.061788
promotion_last_5years,0.025605,-0.008684,-0.006064,-0.003544,0.067433,0.039245,-0.061788,1.0


In [8]:
groupby_sales=df.groupby('sales').mean()



In [9]:
IT=groupby_sales['satisfaction_level'].IT
RandD=groupby_sales['satisfaction_level'].RandD
accounting=groupby_sales['satisfaction_level'].accounting
hr=groupby_sales['satisfaction_level'].hr
management=groupby_sales['satisfaction_level'].management
marketing=groupby_sales['satisfaction_level'].marketing
product_mng=groupby_sales['satisfaction_level'].product_mng
sales=groupby_sales['satisfaction_level'].sales
support=groupby_sales['satisfaction_level'].support
technical=groupby_sales['satisfaction_level'].technical

In [67]:
df_drop=df.drop(labels=['sales', 'salary'], axis=1)
df_drop

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years
0,0.38,0.53,2,157,3,0,1,0
1,0.80,0.86,5,262,6,0,1,0
2,0.11,0.88,7,272,4,0,1,0
3,0.72,0.87,5,223,5,0,1,0
4,0.37,0.52,2,159,3,0,1,0
5,0.41,0.50,2,153,3,0,1,0
6,0.10,0.77,6,247,4,0,1,0
7,0.92,0.85,5,259,5,0,1,0
8,0.89,1.00,5,224,5,0,1,0
9,0.42,0.53,2,142,3,0,1,0


In [11]:
cols=df_drop.columns.tolist()
cols
cols.insert(0,cols.pop(cols.index('left')))
cols

['left',
 'satisfaction_level',
 'last_evaluation',
 'number_project',
 'average_montly_hours',
 'time_spend_company',
 'Work_accident',
 'promotion_last_5years']

In [69]:
df_drop=df_drop.reindex(columns=cols)
df_drop

Unnamed: 0,left,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years
0,1,0.38,0.53,2,157,3,0,0
1,1,0.80,0.86,5,262,6,0,0
2,1,0.11,0.88,7,272,4,0,0
3,1,0.72,0.87,5,223,5,0,0
4,1,0.37,0.52,2,159,3,0,0
5,1,0.41,0.50,2,153,3,0,0
6,1,0.10,0.77,6,247,4,0,0
7,1,0.92,0.85,5,259,5,0,0
8,1,0.89,1.00,5,224,5,0,0
9,1,0.42,0.53,2,142,3,0,0


In [96]:
X=df_drop.iloc[:, 1:8].values
Y=df_drop.iloc[:,0].values


# np.shape(X)
# np.shape(Y)

In [65]:
X_std=StandardScaler().fit_transform(X)
mean_vec=np.mean(X_std, axis=0)
mean_vec
cov_mat=(X_std-mean_vec).T.dot((X_std-mean_vec))/(X_std.shape[0]-1)
cov_mat

array([[ 1.00006668,  0.10502822, -0.14297912, -0.02004945, -0.1008728 ,
         0.05870115,  0.02560689],
       [ 0.10502822,  1.00006668,  0.34935588,  0.33976445,  0.1315995 ,
        -0.00710476, -0.00868435],
       [-0.14297912,  0.34935588,  1.00006668,  0.41723845,  0.19679901,
        -0.00474086, -0.00606436],
       [-0.02004945,  0.33976445,  0.41723845,  1.00006668,  0.12776343,
        -0.01014356, -0.00354465],
       [-0.1008728 ,  0.1315995 ,  0.19679901,  0.12776343,  1.00006668,
         0.00212056,  0.06743742],
       [ 0.05870115, -0.00710476, -0.00474086, -0.01014356,  0.00212056,
         1.00006668,  0.03924805],
       [ 0.02560689, -0.00868435, -0.00606436, -0.00354465,  0.06743742,
         0.03924805,  1.00006668]])

In [17]:
eig_vals, eig_vecs=np.linalg.eig(cov_mat)

In [97]:
Y=Y.reshape(-1,1)
Y=Y.astype('float32')
Y.shape
    

(14999, 1)

In [87]:
eig_vals1=eig_vals.reshape(-1,1)

In [113]:
x=tf.placeholder(tf.float32, shape=[None,7])
y=tf.placeholder(tf.float32, shape=[None,1])
w=tf.Variable(tf.random_normal([7,1]), name= 'weight')
b=tf.Variable(tf.random_normal([1]), name= 'bias')
hf=tf.matmul(x,w)+b
# cost=-tf.reduce_mean(y*tf.log(hf)+(1-y)*tf.log(1-hf))
cost= tf.reduce_mean(tf.square(hf-y))

In [116]:
predicted=tf.cast(hf>0.5,dtype=tf.float32)
train=tf.train.GradientDescentOptimizer(0.001).minimize(cost)
accuracy=tf.reduce_mean(tf.cast(tf.equal(predicted,y),dtype=tf.float32))

In [117]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for step in range(10001):
        _, cv=sess.run([train, cost],feed_dict={x:X, y:Y})
        if step % 2000==0:
            print(step, cv)
    hv,pv, av=sess.run([hf, predicted, accuracy], feed_dict={x:X, y:Y})
    print("hf=", hv, "pred=", pv, 'acc=', av)

0 2397.5117
2000 nan
4000 nan
6000 nan
8000 nan
10000 nan
hf= [[nan]
 [nan]
 [nan]
 ...
 [nan]
 [nan]
 [nan]] pred= [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]] acc= 0.7619175
