In [None]:
import pandas as pd

In [None]:
ab_results = pd.read_csv('../AB-test/A_B test_results_v2.csv')

In [None]:
from datetime import datetime
clicksPerDay = {}
datelist = pd.date_range(min(ab_results['DATE']), periods = 14)

for i in datelist:
    date = str(i).split(' ')[0]
    date_df = ab_results[ab_results.DATE == date]
    relatedArticleABtestcount = date_df[date_df.CONTENTID == '"related-articles-abtest"']
    recommenderABtestcount = date_df[date_df.CONTENTID == '"recommender-abtest"']
    clicksPerDay[date] = {'related-articles-abtest': len(relatedArticleABtestcount), 'recommender-abtest': len(recommenderABtestcount)}

date_li = [str(i).split(' ')[0] for i in list(datelist)]
ab_results = ab_results[ab_results.DATE.isin(date_li)]

In [None]:
dates = [i[-8:] for i in clicksPerDay.keys()]
X_coordinates = [i for i in range(len(clicksPerDay.keys()))]
Y_coordinates_realted = [v.get('related-articles-abtest') for k,v in clicksPerDay.items()]
Y_coordinates_recommender = [v.get('recommender-abtest') for k,v in clicksPerDay.items()]

In [None]:
percentage = (sum(Y_coordinates_recommender)-sum(Y_coordinates_realted))/sum(Y_coordinates_realted)
print(f'The A/B test shows that the method using NB-SBERT-BASE has {round(percentage*100,2)}% more clicks rather than the more generic recommender based on tags')

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(16, 8))

vals_to_percentage = [i for i in range(len(dates))]
ax.plot(dates, Y_coordinates_recommender, label='Bert Recommender (new)')
ax.plot(dates, Y_coordinates_realted, label='Generic tags recommender (former)')

ax.set_xlabel('Dates')
ax.set_ylabel('Clicks')
ax.set_ylim([0,1850])

ax.legend()

for i in range(len(dates)):
    x = vals_to_percentage[i]
    y1 = Y_coordinates_recommender[i]
    y2 = Y_coordinates_realted[i]
    diff = ((y1 - y2) / y2) * 100
    ax.plot([x, x], [y1, y2], alpha=0.5, color='gray', linestyle='--', linewidth=1.5, zorder=1)
    ax.annotate(f'{diff:.1f}%', xy=(x, (y1 + y2) / 2), xytext=(x + 0.2, (y1 + y2) / 2),
                arrowprops=dict(arrowstyle='->', connectionstyle="arc3", alpha=0.5), ha='left', va='center', zorder=2)
#plt.savefig('ab_test_general.png', format='png', dpi=200,bbox_inches='tight')
plt.show()

In [None]:
BERTtotalClicks = len(ab_results[ab_results.CONTENTID == '"recommender-abtest"'])
TagstotalClicks = len(ab_results[ab_results.CONTENTID == '"related-articles-abtest"'])

### CTR

In [None]:
#Dataframe of number of how many users how have seen the A-B test recommendation widget
totalViews = pd.read_csv('AB-test/total_views_per_day_v2.csv')
totalViews

In [None]:
dates_views = [i[-8:] for i in totalViews['DATE']]
X_coordinates_views = [i for i in range(len(totalViews['DATE']))]
Y_coordinates_views = totalViews['TOTAL_VIEWS']

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(16, 8))

ax.plot(dates_views, Y_coordinates_views, label='Total views')
ax.set_xlabel('Dates')
ax.set_ylabel('Views')
ax.set_ylim([0,41000])
ax.legend()

#plt.savefig('ab_test_views.png', format='png', dpi=200,bbox_inches='tight')
plt.show()

In [None]:
viewsPerDay = {r.DATE : r.TOTAL_VIEWS/2 for i,r in totalViews.iterrows()}

In [None]:
y_coordinates_ctr_tags = []
y_coordinates_ctr_bert = []

for k,v in clicksPerDay.items():
    temp_ctr_tags = (v.get('related-articles-abtest')/viewsPerDay.get(k))*100
    y_coordinates_ctr_tags.append(temp_ctr_tags)
    temp_ctr_bert = (v.get('recommender-abtest')/viewsPerDay.get(k))*100
    y_coordinates_ctr_bert.append(temp_ctr_bert)

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(16, 8))

vals_to_percentage = [i for i in range(len(dates_views))]
ax.plot(dates_views, y_coordinates_ctr_bert, label='Bert Recommender (new)')
ax.plot(dates_views, y_coordinates_ctr_tags, label='Generic tags recommender (former)')
#ax.plot(dates, Y_coordinates_realted, label='Generic tags recommender (former)')

ax.set_xlabel('Dates')
ax.set_ylabel('CTR (%)')

ax.set_ylim([0,11.5])

ax.legend()

for i in range(len(dates)):
    x = vals_to_percentage[i]
    y1 = y_coordinates_ctr_bert[i]
    y2 = y_coordinates_ctr_tags[i]
    diff = ((y1 - y2) / y2) * 100
    ax.plot([x, x], [y1, y2], alpha=0.5, color='gray', linestyle='--', linewidth=1.5, zorder=1)
    ax.annotate(f'{diff:.1f}%', xy=(x, (y1 + y2) / 2), xytext=(x + 0.2, (y1 + y2) / 2),
                arrowprops=dict(arrowstyle='->', connectionstyle="arc3", alpha=0.5), ha='left', va='center', zorder=2)
#plt.savefig('ab_test_ctr.png', format='png', dpi=200,bbox_inches='tight')
plt.show()

### Gender and Age figures

### BERT and Tags on gender in subplots figure

In [None]:
df_bertModel = ab_results[ab_results.CONTENTID == '"recommender-abtest"']
df_tagModel = ab_results[ab_results.CONTENTID == '"related-articles-abtest"']
# Specifies on SEX
bertFemale = df_bertModel[df_bertModel.SEX == 'F']
bertMale = df_bertModel[df_bertModel.SEX == 'M']
bertUnspecified = df_bertModel[df_bertModel.SEX == 'Unspecified']
tagFemale = df_tagModel[df_tagModel.SEX == 'F']
tagMale = df_tagModel[df_tagModel.SEX == 'M']
tagUnspecified = df_tagModel[df_tagModel.SEX == 'Unspecified']

In [None]:
BERTgenderValues = [len(bertFemale),len(bertMale),len(bertUnspecified)]
BERTgenderLabels = ['Female','Male','Unknown']
TaggenderValues = [len(tagFemale),len(tagMale),len(tagUnspecified)]
TaggenderLabels = ['Female','Male','Unknown']

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

wedges1, _ = ax1.pie(BERTgenderValues, labels=None, startangle=90)
ax1.set_title('BERT on gender')

labels_with_data1 = [f'{label} ({size}, {size/sum(BERTgenderValues)*100:.1f}%)' for label, size in zip(BERTgenderLabels, BERTgenderValues)]

ax1.legend(wedges1, labels_with_data1, title='Gender (Count, Percentage)', loc='lower center')

wedges2, _ = ax2.pie(TaggenderValues, labels=None, startangle=90)
ax2.set_title('Tags on gender')

labels_with_data2 = [f'{label} ({size}, {size/sum(TaggenderValues)*100:.1f}%)' for label, size in zip(TaggenderLabels, TaggenderValues)]

ax2.legend(wedges2, labels_with_data2, title='Gender (Count, Percentage)', loc='lower center')

fig.subplots_adjust(wspace=0.005, bottom=0.005)

#plt.savefig('../Figures_ab/ab_test_gender.png', format='png', dpi=200,bbox_inches='tight')
plt.show()

### BERT and Tags on age in subplots figure

In [None]:
age = 45
bertOver45 = df_bertModel[df_bertModel.AGE > age]
bertUnder45 = df_bertModel[df_bertModel.AGE <= age]
bertNone = df_bertModel[df_bertModel.AGE.isna()]
tagOver45 = df_tagModel[df_tagModel.AGE > age]
tagUnder45 = df_tagModel[df_tagModel.AGE <= age]
tagNone = df_tagModel[df_tagModel.AGE.isna()]

In [None]:
BERTageValues = [len(bertOver45),len(bertUnder45),len(bertNone)]
BERTageLabels = [f'Age > {age}',f'Age <= {age}',f'Age = Unknown']
TagsageValues = [len(tagOver45),len(tagUnder45),len(tagNone)]
TagsageLabels = [f'Age > {age}',f'Age <= {age}',f'Age = Unknown']

In [None]:
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

wedges1, _ = ax1.pie(BERTageValues, labels=None, startangle=90)
ax1.set_title('BERT on age')

labels_with_data1 = [f'{label} ({size}, {size/sum(BERTageValues)*100:.1f}%)' for label, size in zip(BERTageLabels, BERTageValues)]

ax1.legend(wedges1, labels_with_data1, title='Age (Count, Percentage)', loc='lower center')

wedges2, _ = ax2.pie(TagsageValues, labels=None, startangle=90)
ax2.set_title('Tags on age')

labels_with_data2 = [f'{label} ({size}, {size/sum(TagsageValues)*100:.1f}%)' for label, size in zip(TagsageLabels, TagsageValues)]

ax2.legend(wedges2, labels_with_data2, title='Age (Count, Percentage)', loc='lower center')

fig.subplots_adjust(wspace=0.005, bottom=0.005)

#plt.savefig('../Figures_ab/ab_test_age.png', format='png', dpi=200,bbox_inches='tight')
plt.show()