In [1]:
# This dataset has four variables, each using different numerical scales and units. If we want 
# to use this dataset for a neural network model, we need to standardize these variables, 
# otherwise it is likely that "Satisfaction_Level" (which ranges between 0 and 1) would be 
# undervalued compared to "Time_Spent" (which ranges from approximately 50 to 4,800 hours). 
# Let's start by adding the following code to the notebook:

# Import our dependencies
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Read in our dataset
hr_df = pd.read_csv("hr_dataset.csv")
hr_df.head()

Unnamed: 0,Satisfaction_Level,Num_Projects,Time_Spent,Num_Promotions
0,0.3,1,253,2
1,0.25,1,200,0
2,0.9,4,2880,5
3,0.65,3,1450,3
4,0.5,2,785,2


In [2]:
# If this dataset contained categorical data, we would need to slice out the categorical 
# data prior to scaling.

# To apply our standardization, we need to create a StandardScaler instance by adding 
# and running the following code:

# Create the StandardScaler instance
scaler = StandardScaler()

In [3]:
# Once we have our StandardScaler instance, we need to fit the input data by adding and running 
# the next line of code:

# Fit the StandardScaler
scaler.fit(hr_df)

StandardScaler()

In [4]:
# After our StandardScaler instance is fitted with the numerical data, we can transform and 
# standardize the dataset using the following code:

# Scale the data
scaled_data = scaler.transform(hr_df)

In [5]:
# Lastly, once we have our transformed data within the StandardScaler instance, we must export 
# the transformed data into a Pandas DataFrame. Again, we must add and run the following code 
# to our notebooks:

# Create a DataFrame with the scaled data
transformed_scaled_data = pd.DataFrame(scaled_data, columns=hr_df.columns)
transformed_scaled_data.head()

Unnamed: 0,Satisfaction_Level,Num_Projects,Time_Spent,Num_Promotions
0,-1.303615,-1.162476,-1.049481,-0.558656
1,-1.512945,-1.162476,-1.094603,-1.804887
2,1.208335,0.860233,1.18708,1.310692
3,0.161689,0.185996,-0.030385,0.06446
4,-0.466299,-0.48824,-0.596549,-0.558656


In [None]:
# Now the data is ready to be passed along to our neural network model.