# Set-Up and Import

In [1]:
import pandas as pd
import os
from ast import literal_eval

In [2]:
os.getcwd()

'/home/tomoswynroberts/reedsy-challenge-data'

In [3]:
# adding literal_eval conversion to the concatenated column to read as list
reedsy_df = pd.read_csv("data/dataset.tsv", sep = '\t' \
                        ,converters = {'popup_version|start_date|popup_category': literal_eval})

In [17]:
## Alternative approach to splitting without literal approach
# reedsy_df_1 = pd.read_csv("data/dataset.tsv", sep = '\t')
# reedsy_df_1["popup_version|start_date|popup_category"] = \
# reedsy_df_1["popup_version|start_date|popup_category"].apply(lambda x:x[2:-2].split('","'))
# reedsy_df_1[["popup_version", "start_date" , "popup_category"]] = \
# pd.DataFrame(reedsy_df_1["popup_version|start_date|popup_category"].tolist(), index = reedsy_df_1.index)
# reedsy_df_1.shape

# Data sense-checks

In [4]:
reedsy_df.head(3)

Unnamed: 0,popup_name,blog_post_url,popup_version|start_date|popup_category,popup_header,popup_description,popup_image_url,popup_title,views,registrations
0,Author Bio Template 2 (New Style),https://blog.bookly.com/about-the-author-examp...,"[A, 2020-05-01, Perfecting your Craft]",\nFREE: Author Bio Template,Download a copy and write your bio in under 5 ...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,About the Author Examples (That You'll Actuall...,1554,70
1,Author Bio Template 2 (New Style),https://blog.bookly.com/about-the-author-examp...,"[A, 2020-06-05, Perfecting your Craft]",\nFREE: Author Bio Template,Download a copy and write your bio in under 5 ...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,About the Author Examples (That You'll Actuall...,2667,126
2,Author Bio Template 2 (New Style),https://blog.bookly.com/about-the-author-examp...,"[A, 2020-05-01, Perfecting your Craft]",\nFREE: Author Bio Template,Download a copy and write your bio in under 5 ...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,About the Author Examples (That You'll Actuall...,35,0


In [5]:
reedsy_df.shape

(2063, 9)

In [6]:
reedsy_df.columns

Index(['popup_name', 'blog_post_url',
       'popup_version|start_date|popup_category', 'popup_header',
       'popup_description', 'popup_image_url', 'popup_title', 'views',
       'registrations'],
      dtype='object')

In [7]:
reedsy_df.dtypes

popup_name                                 object
blog_post_url                              object
popup_version|start_date|popup_category    object
popup_header                               object
popup_description                          object
popup_image_url                            object
popup_title                                object
views                                       int64
registrations                               int64
dtype: object

In [8]:
reedsy_df.describe()

Unnamed: 0,views,registrations
count,2063.0,2063.0
mean,1285.44159,22.99176
std,3486.148325,144.024029
min,21.0,0.0
25%,21.0,0.0
50%,91.0,0.0
75%,994.0,21.0
max,45101.0,5397.0


In [9]:
 reedsy_df.isnull().sum().sum()

0

In [13]:
## Dup check:
reedsy_df[reedsy_df.loc[:, reedsy_df.columns != "popup_version|start_date|popup_category"].duplicated(keep = False)]

Unnamed: 0,popup_name,blog_post_url,popup_version|start_date|popup_category,popup_header,popup_description,popup_image_url,popup_title,views,registrations,popup_version,start_date,popup_category
1293,Literary Devices 1,https://blog.bookly.com/metaphor-examples/,"[, 2019-12-26, Perfecting your Craft]",\nFree Download: Complete List of Literary Dev...,\nGet all the literary devices on this page (p...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,﻿90+ Metaphor Examples in Literature That You ...,21,0,,2019-12-26,Perfecting your Craft
1294,Literary Devices 1,https://blog.bookly.com/metaphor-examples/,"[, 2019-12-26, Perfecting your Craft]",\nFree Download: Complete List of Literary Dev...,\nGet all the literary devices on this page (p...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,﻿90+ Metaphor Examples in Literature That You ...,21,0,,2019-12-26,Perfecting your Craft
1773,Upgrade | Character Profile | 2020-03,https://blog.bookly.com/book-title-generator/f...,"[B, 2020-06-05, generator]",Free Download: Character Profile Template,Develop your characters like a bestselling aut...,https://blog-cdn.bookly.com/directories/admin/...,﻿﻿﻿Fantasy Book Title Generator • The Ultimate...,21,0,B,2020-06-05,generator
1775,Upgrade | Character Profile | 2020-03,https://blog.bookly.com/book-title-generator/f...,"[B, 2020-06-05, generator]",Free Download: Character Profile Template,Develop your characters like a bestselling aut...,https://blog-cdn.bookly.com/directories/admin/...,﻿﻿﻿Fantasy Book Title Generator • The Ultimate...,21,0,B,2020-06-05,generator


# Expand concatenated column

In [10]:
## Check concatenated column
print(reedsy_df["popup_version|start_date|popup_category"][0])
print(reedsy_df["popup_version|start_date|popup_category"][3])

['A', '2020-05-01', 'Perfecting your Craft']
['', '2020-05-27', 'Book Marketing, Understanding Publishing']


In [11]:
## Split column:
reedsy_df[["popup_version", "start_date" , "popup_category"]] = \
pd.DataFrame(reedsy_df["popup_version|start_date|popup_category"].tolist(), index = reedsy_df.index)

In [12]:
reedsy_df.shape

(2063, 12)

# Combination Checks

In [86]:
## Filter onto rows running ab experiments
reedsy_df_ab = reedsy_df.loc[reedsy_df.popup_version != ""].copy()

In [89]:
# count popup versions per popup name
reedsy_df_ab.groupby('popup_name', as_index = False).agg({'popup_version':'nunique'}).\
sort_values('popup_version', ascending = False)

Unnamed: 0,popup_name,popup_version
0,Author Bio Template 2 (New Style),2
99,Learning Story Structure,2
131,Literary Devices 1,2
130,Learning | Writing a Novel | 2020-03,2
127,Learning | Short Story | 2020-03,2
...,...,...
111,Learning | Amazon Algorithms | 2020-03,1
110,Learning | Amazon Ads | 2020-03,1
109,Learning | Agents | 2020-05,1
45,Discovery - Generic Beta #2,1


In [110]:
# count dates per popup name
reedsy_df_ab.groupby('popup_name', as_index = False).agg({'start_date':'nunique'}).\
sort_values('start_date', ascending = False)

Unnamed: 0,popup_name,start_date
25,Default – Discovery,13
195,bookly Book Editor Foxtrot,13
4,Book Launch Checklist 4 (New Style),13
156,Marketplace Professionals Alpha,12
14,Character Profile Checklist 3,11
...,...,...
89,Learning Marketing 101,1
41,Discovery - Fantasy #2,1
138,Marketplace Editing Charlie,1
137,Marketplace Designer Echo,1


In [111]:
## Multiple experiments were ran on some popups due to multiple start dates
## Concatenate the popup_name and start date to give us each distinct experiment
reedsy_df_ab["popup_name_dt"] = reedsy_df_ab.popup_name + '_' + reedsy_df_ab.start_date


In [114]:
## Here we'll discount any popups that have not shown both verions as those experiments are incomplete
completed_exps = reedsy_df_ab.groupby('popup_name_dt', as_index = False).agg({'popup_version':'nunique'})
completed_exps = completed_exps.rename({'popup_version':'popup_versions'}, axis = 1)
completed_exps = completed_exps.loc[completed_exps.popup_versions == 2].copy()

reedsy_df_ab_completed = reedsy_df_ab.merge(completed_exps, how = 'inner', on = 'popup_name_dt')

In [115]:
## check the merge:
reedsy_df_ab_completed.groupby('popup_name_dt', as_index = False).agg({'popup_version':'nunique'}).\
sort_values('popup_version', ascending = False)

Unnamed: 0,popup_name_dt,popup_version
0,Book Launch Checklist 4 (New Style)_2020-02-27,2
1,Book Launch Checklist 4 (New Style)_2020-05-05,2
32,Learning | Marketing 101 | 2020-05_2020-06-12,2
33,Learning | Self vs Trad | 2020-05_2020-05-06,2
34,Learning | Self-Editing | 2020-03_2020-05-01,2
35,Learning | Self-Editing | 2020-03_2020-06-05,2
36,Learning | Writing a Novel | 2020-03_2020-05-05,2
37,Learning | Writing a Novel | 2020-03_2020-06-05,2
38,Literary Devices 3 (New Style)_2020-02-29,2
39,Manuscript Template_2020-02-17,2


In [108]:
# check distinct start dates
reedsy_df_ab_completed.groupby(['popup_name'], as_index = False).agg({'start_date':'nunique'}).\
sort_values('start_date', ascending = False)

Unnamed: 0,popup_name,start_date
4,Book Launch Checklist 4 (New Style),13
15,Default – Discovery,13
89,bookly Book Editor Foxtrot,13
68,Marketplace Professionals Alpha,12
73,Story Structure Template 1,11
...,...,...
70,Prompts | Contest | 2020-04-b,2
30,Freelancer Blog Exit Modal – Money,1
54,Learning | Self vs Trad | 2020-05,1
88,bookly Book Editor Echo (Photo 1),1


In [54]:
## add a column that concatenates header, title, desc and image:
reedsy_df["popup_concat"] = reedsy_df.popup_header + reedsy_df.popup_description + reedsy_df.popup_image_url + reedsy_df.popup_title

In [71]:
## count unqiue entries of concat per popup_name and blog
reedsy_df.groupby(['blog_post_url', 'popup_name'], as_index = False).agg({
    'popup_concat':'nunique', 'views':'count', 'popup_version':'nunique'}).\
sort_values('popup_version', ascending = False)

Unnamed: 0,blog_post_url,popup_name,popup_concat,views,popup_version
579,https://blog.bookly.com/fantasy-tropes/,Worldbuilding Template,3,4,2
95,https://blog.bookly.com/book-cover-ideas/,Cover Design Checklist 3,2,3,2
552,https://blog.bookly.com/ebook-distribution/,Book Launch Checklist,1,2,2
375,https://blog.bookly.com/choosing-ghostwriter-s...,Marketplace Ghostwriter,1,3,2
1150,https://blog.bookly.com/romance-tropes/,Learning Romance 1,3,4,2
...,...,...,...,...,...
515,https://blog.bookly.com/creative-writing-promp...,Learning | Short Story | 2020-03,1,1,1
514,https://blog.bookly.com/creative-writing-promp...,Learning | Writing a Novel | 2020-03,1,1,1
513,https://blog.bookly.com/creative-writing-promp...,Learning | Short Story | 2020-03,1,1,1
512,https://blog.bookly.com/creative-writing-promp...,Learning | Writing a Novel | 2020-03,1,1,1


In [69]:
reedsy_df.loc[(reedsy_df.popup_name == 'Worldbuilding Template') & (reedsy_df.blog_post_url == 'https://blog.bookly.com/fantasy-tropes/')] 

Unnamed: 0,popup_name,blog_post_url,popup_version|start_date|popup_category,popup_header,popup_description,popup_image_url,popup_title,views,registrations,popup_version,start_date,popup_category,popup_concat
1902,Worldbuilding Template,https://blog.bookly.com/fantasy-tropes/,"[, 2020-06-05, Perfecting your Craft]",\nDownload: Worldbuilding Template,Enter your email to receive bookly's #1 worldb...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,14 Popular Fantasy Tropes (and How to Revitali...,7490,70,,2020-06-05,Perfecting your Craft,\nDownload: Worldbuilding TemplateEnter your e...
1903,Worldbuilding Template,https://blog.bookly.com/fantasy-tropes/,"[, 2020-06-05, Perfecting your Craft]",\nDownload: Worldbuilding Template,Enter your email to receive bookly's #1 worldb...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,14 popularnych tropów fantasy (i jak je ożywić),21,0,,2020-06-05,Perfecting your Craft,\nDownload: Worldbuilding TemplateEnter your e...
1904,Worldbuilding Template,https://blog.bookly.com/fantasy-tropes/,"[B, 2020-05-01, Perfecting your Craft]",\nDownload: Worldbuilding Template,Enter your email to receive bookly's #1 worldb...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,14 Popular Fantasy Tropes (and How to Revitali...,1645,0,B,2020-05-01,Perfecting your Craft,\nDownload: Worldbuilding TemplateEnter your e...
1905,Worldbuilding Template,https://blog.bookly.com/fantasy-tropes/,"[B, 2020-05-01, Perfecting your Craft]",\nتنزيل: قالب Worldbuilding,أدخل بريدك الإلكتروني لتلقي مورد bookly # 1 لب...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,14 مجازفة خيالية شائعة (وكيفية تنشيطها),21,0,B,2020-05-01,Perfecting your Craft,\nتنزيل: قالب Worldbuildingأدخل بريدك الإلكترو...


In [73]:
## count unqiue entries of category per popup name
reedsy_df.groupby(['popup_name'], as_index = False).agg({'popup_category':'nunique', 'popup_version':'nunique'}).\
sort_values('popup_version', ascending = False)

Unnamed: 0,popup_name,popup_category,popup_version
0,Author Bio Template 2 (New Style),3,3
15,Character Profile Checklist 4,2,3
135,Literary Devices 3 (New Style),1,3
133,Literary Devices 1,1,3
166,Query Letter Checklist,1,3
...,...,...,...
55,Discovery - Reading Log #1,1,1
124,Learning | Reviews | 2020-03,1,1
123,Learning | Query Letters | 2020-04,1,1
122,Learning | Picturebook Publishing | 2020-03,2,1


In [79]:
## count unqiue start dates
reedsy_df.groupby(['popup_name'], as_index = False).agg({\
    'start_date':'nunique'}).\
sort_values('start_date', ascending = False)

Unnamed: 0,popup_name,start_date
198,bookly Book Editor Foxtrot,13
4,Book Launch Checklist 4 (New Style),13
25,Default – Discovery,13
170,Story Structure Template 1,13
14,Character Profile Checklist 3,13
...,...,...
37,Discovery - Books To Read Before You Die Check...,1
115,Learning | BookBub Ads | 2020-06,1
114,Learning | Big 5 | 2020-06,1
113,Learning | Amazon Algorithms | 2020-03,1


In [84]:
## count unqiue start dates
reedsy_df.groupby(['blog_post_url', 'popup_name', 'popup_category'], as_index = False).agg({
    'start_date':'nunique'}).\
sort_values('start_date', ascending = False)

Unnamed: 0,blog_post_url,popup_name,popup_category,start_date
375,https://blog.bookly.com/choosing-ghostwriter-s...,Marketplace Ghostwriter,Understanding Publishing,3
272,https://blog.bookly.com/character-name-generat...,Learning | Writing a Novel | 2020-03,generator,3
730,https://blog.bookly.com/how-to-get-your-book-p...,Query Letter Checklist 3 (New Style),Understanding Publishing,2
306,https://blog.bookly.com/character-name-generat...,Learning | Writing a Novel | 2020-03,generator,2
1090,https://blog.bookly.com/plot-generator/drama/,Writing Prompts #3,generator,2
...,...,...,...,...
546,https://blog.bookly.com/david-fugate-literary-...,Learning Trad Pub,Understanding Publishing,1
545,https://blog.bookly.com/dan-harmon-story-circle//,Story Structure Template 1,Perfecting your Craft,1
544,https://blog.bookly.com/dan-harmon-story-circle//,Learning | Structure | 2020-05,Perfecting your Craft,1
543,https://blog.bookly.com/dan-harmon-story-circle/,Story Structure Template 1,Perfecting your Craft,1


In [85]:
reedsy_df.loc[(reedsy_df.popup_name == 'Marketplace Ghostwriter') & (reedsy_df.popup_category == 'Understanding Publishing')] 

Unnamed: 0,popup_name,blog_post_url,popup_version|start_date|popup_category,popup_header,popup_description,popup_image_url,popup_title,views,registrations,popup_version,start_date,popup_category,popup_concat
1405,Marketplace Ghostwriter,https://blog.bookly.com/choosing-ghostwriter-s...,"[, , Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://blog.bookly.com/wp-content/uploads/201...,How to Hire a Ghostwriter You Actually Trust i...,21,0,,,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1406,Marketplace Ghostwriter,https://blog.bookly.com/choosing-ghostwriter-s...,"[, 2020-05-27, Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://blog.bookly.com/wp-content/uploads/201...,How to Hire a Ghostwriter You Actually Trust i...,1932,21,,2020-05-27,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1407,Marketplace Ghostwriter,https://blog.bookly.com/choosing-ghostwriter-s...,"[A, 2020-06-12, Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://blog.bookly.com/wp-content/uploads/201...,How to Hire a Ghostwriter You Actually Trust i...,196,0,A,2020-06-12,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1408,Marketplace Ghostwriter,https://blog.bookly.com/choosing-ghostwriter-s...,"[, 2020-05-27, Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://blog.bookly.com/wp-content/uploads/201...,How to Hire a Ghostwriter You Actually Trust i...,35,0,,2020-05-27,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1409,Marketplace Ghostwriter,https://blog.bookly.com/ghostwritten-books/,"[, , Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,7 Surprisingly Ghostwritten Books,1225,0,,,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1410,Marketplace Ghostwriter,https://blog.bookly.com/hire-ghostwriter-andre...,"[A, 2020-03-02, Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,"How Much Does a Ghostwriter Cost, According to...",959,0,A,2020-03-02,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...
1413,Marketplace Ghostwriter,https://blog.bookly.com/work-with-a-ghostwriter/,"[A, , Understanding Publishing]",\nLooking for a ghostwriter?,\nThe best publishing professionals are on boo...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,What's it like to work with a ghostwriter on y...,56,0,A,,Understanding Publishing,\nLooking for a ghostwriter?\nThe best publish...


In [27]:
## Calculate conversion rates for popup versions

## sum views and registrations
reedsy_popup = reedsy_df.groupby('popup_version', as_index = False).agg({'views':sum, 'registrations':sum})

## calculate conversion rate
reedsy_popup["conversion_rate"] = reedsy_popup.registrations / reedsy_popup.views

In [22]:
## remove the null popup versions
reedsy_popup = reedsy_popup.loc[reedsy_popup.popup_version != ''].copy()

In [28]:
reedsy_popup.style.format({
    "views": "{:,}",
    "registrations": "{:,}",
    "conversion_rate": "{:.2%}"
}).hide(axis="index")

popup_version,views,registrations,conversion_rate
,333606,16709,5.01%
A,1117711,15834,1.42%
B,1200549,14889,1.24%


Popup A was more performant

In [25]:
## Calculate conversion rates by start date

## sum views and registrations
reedsy_dt = reedsy_df.groupby('start_date', as_index = False).agg({'views':sum, 'registrations':sum})

## calculate conversion rate
reedsy_dt["conversion_rate"] = reedsy_dt.registrations / reedsy_dt.views

In [26]:
reedsy_dt.head()

Unnamed: 0,start_date,views,registrations,conversion_rate
0,,48083,812,0.016887
1,2018-06-28,7462,56,0.007505
2,2018-11-14,35,0,0.0
3,2018-12-03,3857,308,0.079855
4,2018-12-31,21,0,0.0


In [34]:
# blank dates with popup version
reedsy_df.loc[reedsy_df.start_date == ""].head()

Unnamed: 0,popup_name,blog_post_url,popup_version|start_date|popup_category,popup_header,popup_description,popup_image_url,popup_title,views,registrations,popup_version,start_date,popup_category
9,Book Launch Checklist,https://blog.bookly.com/ebook-distribution/,"[, , Understanding Publishing]",\nDownload The Book Launch Checklist,\nEnter your email address and download the co...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,Ebook Distribution: The Complete Guide for New...,973,56,,,Understanding Publishing
11,Book Launch Checklist,https://blog.bookly.com/ebook-distribution//,"[, , Understanding Publishing]",\nDownload The Book Launch Checklist,\nEnter your email address and download the co...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,Ebook Distribution: The Complete Guide for New...,21,0,,,Understanding Publishing
26,Book Launch Checklist 3,https://blog.bookly.com/best-self-publishing-r...,"[, , Book Marketing, Perfecting your Craft, Un...",\nDownload: Book Launch Checklist,Enter your email to download bookly's #1 publi...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,The 21 Best News Roundups for Self-Publishing ...,35,0,,,"Book Marketing, Perfecting your Craft, Underst..."
29,Book Launch Checklist 3,https://blog.bookly.com/getting-your-book-cove...,"[, , Book Marketing, Understanding Publishing]",\nDownload: Book Launch Checklist,Enter your email to download bookly's #1 publi...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,Getting Your Book Coverage in the National Press,35,0,,,"Book Marketing, Understanding Publishing"
42,Book Launch Checklist 3,https://blog.bookly.com/swearing-book-title/,"[, , Book Marketing, From our Authors]",\nDownload: Book Launch Checklist,Enter your email to download bookly's #1 publi...,https://d2ybmm5cpznb3i.cloudfront.net/wp-conte...,Was Swearing in my Book Title a Sh*tty Idea?,91,0,,,"Book Marketing, From our Authors"
