In [81]:
import matplotlib.pyplot as plt
import pandas as pd

plt.style.use('seaborn-pastel')
plt.rcParams['figure.figsize'] = (8,7)
plt.rcParams['figure.dpi'] = 500
plt.rcParams['text.color'] = 'black'
plt.rcParams['axes.labelcolor'] = 'black'
plt.rcParams['xtick.color'] = 'black'
plt.rcParams['ytick.color'] = 'black'
plt.rcParams['axes.titlepad'] = 10
plt.rcParams['axes.titleweight'] = 1000
plt.rcParams['axes.labelpad'] = 5
plt.rcParams['font.family'] = 'serif'
plt.rcParams['axes.facecolor'] = 'white'

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.inspection import permutation_importance

us = pd.read_csv('us_clocks.csv')
uk = pd.read_csv('uk_clocks.csv')

independentvariables = ['listingIsTopRated', 'sellerFeedbackScore', 'sellerPositivePercent', 'sellerIsTopRated',
                        'endAtWeekend','endAtEvening','length','isBroken','isUsed','isLarge','freeShipping']

X = uk[independentvariables] 
y = uk.isSold

X_us = us[independentvariables]
y_us = us.isSold

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
X_train_us, X_test_us, y_train_us, y_test_us = train_test_split(X_us, y_us, test_size=0.3)

In [82]:
import matplotlib.transforms as mtransforms

fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,1)
fig.subplots_adjust(hspace=0)
trans = mtransforms.ScaledTranslation(476/72, -5/72, fig.dpi_scale_trans)


clf = DecisionTreeClassifier(max_depth=9, min_samples_leaf=2, min_samples_split=4)
clf.fit(X_train_us, y_train_us)

r = permutation_importance(clf, X_test_us, y_test_us, n_repeats=30, random_state=0, scoring='balanced_accuracy')
forest_importances = pd.Series(r.importances_mean)
forest_importances.plot.bar(yerr=r.importances_std, ax=ax1, edgecolor='black', facecolor='pink')
ax1.set_ylim(-0.02,0.165)
ax1.set_xticks([])


clf = DecisionTreeClassifier(max_depth=9, min_samples_leaf=2, min_samples_split=4)
clf.fit(X_train, y_train)

r = permutation_importance(clf, X_test, y_test, n_repeats=30, random_state=0, scoring='balanced_accuracy')
forest_importances = pd.Series(r.importances_mean)
forest_importances.plot.bar(yerr=r.importances_std, ax=ax2, edgecolor='black', facecolor='pink')
ax2.set_ylim(-0.02,0.165)
ax2.set_xticks([])


clf = GradientBoostingClassifier(max_depth=7, n_estimators=300)
clf.fit(X_train_us, y_train_us)

r = permutation_importance(clf, X_test_us, y_test_us, n_repeats=30, random_state=0, scoring='balanced_accuracy')
forest_importances = pd.Series(r.importances_mean)
forest_importances.plot.bar(yerr=r.importances_std, ax=ax3, edgecolor='black', facecolor='pink')
ax3.set_ylim(-0.02,0.165)
ax3.set_xticks([])


clf = GradientBoostingClassifier(max_depth=14, n_estimators=150)
clf.fit(X_train, y_train)

r = permutation_importance(clf, X_test, y_test, n_repeats=30, random_state=0, scoring='balanced_accuracy')
forest_importances = pd.Series(r.importances_mean, index=[f'$\it{{{l}}}$' for l in X_train.columns])
forest_importances.plot.bar(yerr=r.importances_std, ax=ax4, edgecolor='black', facecolor='pink')
ax4.set_ylim(-0.02,0.165)

ax3.set_ylabel("Mean accuracy decrease")
plt.xticks(rotation=70)


ax1.text(0.0, 1.0, 'Decision Tree, US', transform=ax1.transAxes + trans,
        fontsize='medium', verticalalignment='top', horizontalalignment='right', fontfamily='serif',
        bbox=dict(facecolor='0.7', edgecolor='none', pad=3.0))

ax2.text(0.0, 1.0, 'Decision Tree, UK', transform=ax2.transAxes + trans,
        fontsize='medium', verticalalignment='top', horizontalalignment='right', fontfamily='serif',
        bbox=dict(facecolor='0.7', edgecolor='none', pad=3.0))

ax3.text(0.0, 1.0, 'Gradient Boosted Tree, US', transform=ax3.transAxes + trans,
        fontsize='medium', verticalalignment='top', horizontalalignment='right', fontfamily='serif',
        bbox=dict(facecolor='0.7', edgecolor='none', pad=3.0))

ax4.text(0.0, 1.0, 'Gradient Boosted Tree, UK', transform=ax4.transAxes + trans,
        fontsize='medium', verticalalignment='top', horizontalalignment='right', fontfamily='serif',
        bbox=dict(facecolor='0.7', edgecolor='none', pad=3.0))

fig.tight_layout()
fig.savefig('feature.png', dpi=fig.dpi, bbox_inches='tight')
plt.close(fig)