# Export Combined Recommendations for Interactive Chart

## Load libraries and data

In [23]:
import sys

import pandas as pd

# Local application/library specific imports
sys.path.append("..")
from utils.missing_data_processing import get_preprocessed_data  # For loading missing data

In [24]:
# Load the book dataset with basic preprocessing
data = get_preprocessed_data("books")
books_df = data["books"]

## Load Top Recommendations

Load previously generated top recommendations for both methods.

In [59]:

top_results_lenskit = pd.read_csv('./data/top_scores_lenskit_model100.csv')
# drop unneeded columns so export will be as minimal as possible
top_results_lenskit.drop(columns=['member_id', 'subscription_start', 'subscription_end', 'model_loops', 'member_period', 'formatted_title'], inplace=True)
top_results_lenskit['period'] = top_results_lenskit.period.str.strip()

top_scores_memorycf = pd.read_csv('./data/memorycf_top_results.csv')
top_scores_memorycf.drop(columns=['formatted_chart_title', 'formatted_table_title', 'subscription_start', 'subscription_end'], inplace=True)
top_scores_memorycf.rename(columns={'item_uri': 'item_id'}, inplace=True)

In [13]:
top_results_lenskit.head()

Unnamed: 0,item_id,median,skew,std,var,kurtosis,score,model_run,period
0,burney-evelina-history-young,0.320107,-0.048081,0.18916,0.035781,0.023806,0.809843,0,1921-12-28/1922-11-08
1,burney-evelina-history-young,0.320107,-0.048081,0.18916,0.035781,0.023806,0.148679,1,1921-12-28/1922-11-08
2,burney-evelina-history-young,0.320107,-0.048081,0.18916,0.035781,0.023806,0.60695,2,1921-12-28/1922-11-08
3,burney-evelina-history-young,0.320107,-0.048081,0.18916,0.035781,0.023806,0.21908,3,1921-12-28/1922-11-08
4,burney-evelina-history-young,0.320107,-0.048081,0.18916,0.035781,0.023806,0.172441,4,1921-12-28/1922-11-08


In [30]:
top_scores_memorycf.head()

Unnamed: 0,item_id,member_id,period,metric,score
0,dickens-david-copperfield,hemingway-ernest,1924-03-28/1925-03-28,pearson,1.0
1,thompson-francis-thompsons-works,hemingway-ernest,1924-03-28/1925-03-28,euclidean,1.0
2,carritt-theory-beauty,hemingway-ernest,1924-03-28/1925-03-28,euclidean,0.642857
3,thompson-francis-thompsons-works,hemingway-ernest,1924-03-28/1925-03-28,cosine,0.569721
4,shakespeare-shakespeare,hemingway-ernest,1921-12-28/1922-11-08,euclidean,0.428571


In [17]:
top_results_lenskit.shape

(6300, 9)

In [18]:
top_scores_memorycf.shape

(231, 5)

In [60]:
# save minimal top results for use with interactive figure

top_results_lenskit.to_csv('../figures/interactive/lenskit_top_scores.csv', index=False)
top_scores_memorycf.to_csv('../figures/interactive/memorycf_top_scores.csv', index=False)


## Get book display details for items in the top recommendations


In [35]:
lenskit_items = top_results_lenskit.item_id.unique()
print(f"{len(lenskit_items)} unique items")
lenskit_items[:10]

52 unique items


array(['burney-evelina-history-young', 'cather-antonia',
       'chekhov-black-monk', 'chekhov-letters-tchehov-family',
       'chesterton-appreciations-criticisms-works', 'chesterton-manalive',
       'clouston-lunatic-large-novel', 'conrad-shadow-line-confession',
       'dell-moon-calf', 'dowden-shakespeare'], dtype=object)

In [36]:
memorycf_items = top_scores_memorycf.item_id.unique()
print(f"{len(memorycf_items)} unique items")
memorycf_items[:10]

47 unique items


array(['dickens-david-copperfield', 'thompson-francis-thompsons-works',
       'carritt-theory-beauty', 'shakespeare-shakespeare',
       'swinnerton-george-gissing-critical',
       'byron-lord-byrons-correspondence', 'wright-life-walter-pater',
       'white-gold', 'lindsay-daniel-jazz-poems',
       'corkery-threshold-quiet'], dtype=object)

In [40]:
combined_items = set(lenskit_items) | set(memorycf_items)
print(f"{len(combined_items)} unique items combined")

98 unique items combined


In [62]:
recommended_books = books_df[books_df.id.isin(combined_items)]
assert len(recommended_books) == 98

# filter to columns needed for interactive chart
recommended_books = recommended_books[["id", "title", "author", "year"]]
recommended_books.head(10)

Unnamed: 0,id,title,author,year
37,smollett-adventures-roderick-random,The Adventures of Roderick Random,"Smollett, Tobias",1748.0
39,fielding-history-tom-jones,"The History of Tom Jones, a Foundling","Fielding, Henry",1749.0
41,smollett-adventures-peregrine-pickle,The Adventures of Peregrine Pickle,"Smollett, Tobias",1751.0
55,burney-evelina-history-young,"Evelina: Or, the History of a Young Lady's Ent...","Burney, Fanny",1778.0
118,gogol-dead-souls,Dead Souls,"Gogol, Nikolai",1842.0
150,dickens-david-copperfield,David Copperfield,"Dickens, Charles",1850.0
266,dostoyevsky-possessed,The Possessed,"Dostoyevsky, Fyodor",1872.0
276,green-short-history-english,A Short History of the English People,"Green, John Richard",1874.0
300,dowden-shakespeare,Shakespeare,"Dowden, Edward",1877.0
340,stevenson-virginibus-puerisque-papers,Virginibus Puerisque and Other Papers,"Stevenson, Robert Louis",1881.0


In [66]:
# known title that is problematic for display
recommended_books[recommended_books.title.str.contains("Instigations of Ezra Pound")]

Unnamed: 0,id,title,author,year
1620,pound-instigations-ezra-pound,Instigations of Ezra Pound: Together with an E...,Ezra Pound and Ernest Fenollosa,1920.0


In [64]:
# author is lastname, first; reorder for display

def reverse_name(lastname_first):
    name_parts = lastname_first.split(", ", 1)
    name_parts.reverse()
    return " ".join(name_parts)
    
def author_names(names):
    names = names.split(';')
    return " and ".join([reverse_name(n) for n in names])

recommended_books["author"] = recommended_books.author.apply(author_names)
recommended_books.head(10)

Unnamed: 0,id,title,author,year
37,smollett-adventures-roderick-random,The Adventures of Roderick Random,Tobias Smollett,1748.0
39,fielding-history-tom-jones,"The History of Tom Jones, a Foundling",Henry Fielding,1749.0
41,smollett-adventures-peregrine-pickle,The Adventures of Peregrine Pickle,Tobias Smollett,1751.0
55,burney-evelina-history-young,"Evelina: Or, the History of a Young Lady's Ent...",Fanny Burney,1778.0
118,gogol-dead-souls,Dead Souls,Nikolai Gogol,1842.0
150,dickens-david-copperfield,David Copperfield,Charles Dickens,1850.0
266,dostoyevsky-possessed,The Possessed,Fyodor Dostoyevsky,1872.0
276,green-short-history-english,A Short History of the English People,John Richard Green,1874.0
300,dowden-shakespeare,Shakespeare,Edward Dowden,1877.0
340,stevenson-virginibus-puerisque-papers,Virginibus Puerisque and Other Papers,Robert Louis Stevenson,1881.0


In [65]:
recommended_books[recommended_books.title.str.contains("Instigations of Ezra Pound")]

Unnamed: 0,id,title,author,year
1620,pound-instigations-ezra-pound,Instigations of Ezra Pound: Together with an E...,Ezra Pound and Ernest Fenollosa,1920.0


In [73]:
# shorten this title
instigations_index = recommended_books.index[recommended_books.title.str.contains("Instigations of Ezra Pound")]
recommended_books.loc[instigations_index, "title"] = "Instigations of Ezra Pound"
recommended_books[recommended_books.title.str.contains("Instigations of Ezra Pound")]

Unnamed: 0,id,title,author,year
1620,pound-instigations-ezra-pound,Instigations of Ezra Pound,Ezra Pound and Ernest Fenollosa,1920.0


In [74]:
# export for use in interactive figure
recommended_books.to_csv('../figures/interactive/titles.csv', index=False)