In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
import dmba
from dmba import classificationSummary
import matplotlib.pyplot as plt
import matplotlib

from PIL import Image # i was having issues with the "Interactive backend" and this was the only way to display the visual

In [None]:
data = dmba.load_data('Hair-Care-Product.csv')
data.columns = [d.replace('.', '_').replace(' ', '_').replace('__', '_') for d in data.columns]
data.head()

In [None]:
print("Number of purchases made", sum(data['Purchase'] * data['Promotion_ord']))

print("Total Number of Records", sum(data['Promotion_ord']))

print("80 / 4976 = ", sum(data['Purchase'] * data['Promotion_ord']) / sum(data['Promotion_ord']))

In [None]:
print("Number of purchases made", sum(data['Purchase'] * (1 - data['Promotion_ord'])))

print("Total Number of Records", sum(1 - data['Promotion_ord']))

print("32 / 5024 = ", sum(data['Purchase'] * (1 - data['Promotion_ord'])) / sum(1 - data['Promotion_ord']))

In [None]:
data.columns

In [None]:
data.Hair_Color = data.Hair_Color.astype('category')
data.U_S_Region = data.U_S_Region.astype('category')
data = pd.get_dummies(data, drop_first = True)
data.head()

In [None]:
y = data['Purchase']
x = data.drop(columns = ['Purchase'])

scaler = preprocessing.StandardScaler()
x_norm = scaler.fit_transform(x * 1.0)

data_norm = pd.concat([pd.DataFrame(x_norm, columns = data.columns[1:]),
                       data['Purchase']], axis = 1)
train, valid = train_test_split(data_norm, test_size = 0.4, random_state = 1)

In [None]:
rfModel = RandomForestClassifier(n_estimators = 100)
rfModel.fit(train.drop(columns = ['Purchase']), train.Purchase)

pred = rfModel.predict(valid.drop(columns = ['Purchase']))
classificationSummary(valid.Purchase, pred)

In [None]:
upliftDF = valid.drop(columns = ['Purchase']).copy()

upliftDF.Promotion_ord = 1
predTreatment = rfModel.predict_proba(upliftDF)
upliftDF.Promotion_ord = 0
predControl = rfModel.predict_proba(upliftDF)

upliftResult_knn = pd.DataFrame({
    'probMessage': predTreatment[:,1],
    'probNoMessage': predControl[:,1],
    'uplift': predTreatment[:,1] - predControl[:,1],
    }, index = upliftDF.index)

upliftResult = upliftResult_knn.sort_values(by=['uplift'], ascending=False)
upliftResult.reset_index().plot(x = None, y = 'uplift')

fig = plt.gcf()
fig.savefig('plot.jpg', format = 'jpg', dpi = 300)

image = Image.open('plot.jpg')
image.show() #it'll create a pop up of the visual

In [None]:
upliftResult.head(3)

In [None]:
upliftResult_knn.head(3)