In [None]:
!pip install --upgrade raiwidgets
!pip install --upgrade pandas

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import sklearn
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
import pandas as pd

from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights
from urllib.request import urlretrieve
import zipfile

### Use IBM Employee Attrition Dataset

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

def split_label(dataset, target_feature):
    X = dataset.drop([target_feature], axis=1)
    y = dataset[[target_feature]]
    return X, y

def clean_data(X, y, target_feature):
    features = X.columns.values.tolist()
    classes = y[target_feature].unique().tolist()
    pipe_cfg = {
        'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(),
        'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(),
    }
    num_pipe = Pipeline([
        ('num_imputer', SimpleImputer(strategy='median')),
        ('num_scaler', StandardScaler())
    ])
    cat_pipe = Pipeline([
        ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),
        ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    feat_pipe = ColumnTransformer([
        ('num_pipe', num_pipe, pipe_cfg['num_cols']),
        ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])
    ])
    X = feat_pipe.fit_transform(X)
    print(pipe_cfg['cat_cols'])
    return X, feat_pipe, features, classes



outdirname = 'dataset.6.21.19'
zipfilename = outdirname + '.zip'
urlretrieve('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)
with zipfile.ZipFile(zipfilename, 'r') as unzip:
    unzip.extractall('.')
all_data = pd.read_csv('./WA_Fn-UseC_-HR-Employee-Attrition.csv')

# Dropping Employee count as all values are 1 and hence attrition is independent of this feature
all_data = all_data.drop(['EmployeeCount'], axis=1)
# Dropping Employee Number since it is merely an identifier
all_data = all_data.drop(['EmployeeNumber'], axis=1)
all_data = all_data.drop(['Over18'], axis=1)

# Since all values are 80
all_data = all_data.drop(['StandardHours'], axis=1)

# Converting target variables from string to numerical values
target_map = {'Yes': 'Leaving', 'No': 'Staying'}
all_data["Attrition_numerical"] = all_data["Attrition"].apply(lambda x: target_map[x])
all_data = all_data.drop(['Attrition'], axis=1)


target_feature = "Attrition_numerical"



X, y = split_label(all_data, target_feature)
X_train_original, X_test_original, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=7)

X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train, target_feature)
y_train = y_train[target_feature].to_numpy()

X_test = feat_pipe.transform(X_test_original)
y_test = y_test[target_feature].to_numpy()

train_data = X_train_original.copy()
train_data[target_feature] = y_train

test_data = X_test_original.copy()
test_data[target_feature] = y_test

In [None]:
from lightgbm import LGBMClassifier

clf = LGBMClassifier()
model = clf.fit(X_train, y_train)

In [None]:
categorical = []
for col, value in all_data.iteritems():
    if value.dtype == 'object':
        categorical.append(col)
numerical = all_data.columns.difference(categorical)
#categorical.drop('Attrition_numerical')
categorical.remove('Attrition_numerical')

In [None]:
categorical

In [None]:
numerical

### Compute Responsible AI model and dataset insights

In [None]:
dashboard_pipeline = Pipeline(steps=[('preprocess', feat_pipe), ('model', model)])



model_analysis2 = RAIInsights(dashboard_pipeline, train_data, test_data, target_feature, 'classification',
                              categorical_features=categorical, 
                              classes=['Staying', 'Leaving'])

# Queue Responsible AI insights
model_analysis2.explainer.add()
model_analysis2.counterfactual.add(10, desired_class='opposite')
model_analysis2.error_analysis.add()
model_analysis2.causal.add(treatment_features=['BusinessTravel', 'StockOptionLevel', 'WorkLifeBalance'])

# Compute insights
model_analysis2.compute()

In [None]:
ResponsibleAIDashboard(model_analysis2,
                       feature_flights="newModelOverviewExperience")