# 23-Naive-hybrid-recommendation

In the previous notebook, we created two recommendation system. One is content-based filtering, and the other is collaborative filtering recommender system. In this notebook, we will combine top 5 recommendations from both recommender system, and see how the accuracy will be changed.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('/content/drive/MyDrive/social-networks-project/combined_recommendations.csv') 
df = df.drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,customer_id,purchases,cf_recs,recs_con
0,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,[666448006],"[673677002, 507910001, 372860001, 537116001, 5...","[710899003, 712216001, 615141002, 697054002, 6..."
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,"[671502001, 681376001, 685687001]","[355072002, 615141002, 355569001, 589222001, 3...","[685689001, 244267032, 708352001, 708379004, 6..."
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"[573085010, 636455003, 684210001]","[706016001, 706016002, 539723005, 712587003, 5...","[695545002, 673799003, 562245001, 712587004, 6..."
3,0000f1c71aafe5963c3d195cf273f7bfd50bbf17761c91...,[632982036],"[673677002, 372860001, 537116001, 673396002, 6...","[557248001, 685814001, 685813001, 557247001, 5..."
4,0000f2ea26b7f0a9175f428c8cf7743e9e10e193465ecd...,[562245059],"[615141002, 673396002, 708352001, 692454002, 6...","[448509017, 635957001, 673901001, 677809002, 7..."


## Data Processing

The values of 'purchases', 'cf_recs', and 'recs_con' columns are string. For this reason, I will convert this into regular list.

In [None]:
import ast
# If running single-line vectorized cells, do not also run the original converters

#Vectorized, runs faster
df['cf_recs'] = df['cf_recs'].apply(lambda x: ast.literal_eval(x))
df['recs_con'] = df['recs_con'].apply(lambda x: ast.literal_eval(x))
df['purchases'] = df['purchases'].apply(lambda x: ast.literal_eval(x))

In [None]:
# Combine cf_recs abd recs_con into one column
df['combined'] = df['cf_recs'] + df['recs_con']

In [None]:
df.head()

Unnamed: 0,customer_id,purchases,cf_recs,recs_con,num_purchase,combined
0,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,[666448006],"[673677002, 507910001, 372860001, 537116001, 5...","[710899003, 712216001, 615141002, 697054002, 6...",1,"[673677002, 507910001, 372860001, 537116001, 5..."
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,"[671502001, 681376001, 685687001]","[355072002, 615141002, 355569001, 589222001, 3...","[685689001, 244267032, 708352001, 708379004, 6...",3,"[355072002, 615141002, 355569001, 589222001, 3..."
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"[573085010, 636455003, 684210001]","[706016001, 706016002, 539723005, 712587003, 5...","[695545002, 673799003, 562245001, 712587004, 6...",3,"[706016001, 706016002, 539723005, 712587003, 5..."
3,0000f1c71aafe5963c3d195cf273f7bfd50bbf17761c91...,[632982036],"[673677002, 372860001, 537116001, 673396002, 6...","[557248001, 685814001, 685813001, 557247001, 5...",1,"[673677002, 372860001, 537116001, 673396002, 6..."
4,0000f2ea26b7f0a9175f428c8cf7743e9e10e193465ecd...,[562245059],"[615141002, 673396002, 708352001, 692454002, 6...","[448509017, 635957001, 673901001, 677809002, 7...",1,"[615141002, 673396002, 708352001, 692454002, 6..."


In [None]:
# calculate the number of purchases
df['num_purchase'] = df['purchases'].apply(lambda x:len(x))

In [None]:
df.head()

Unnamed: 0,customer_id,purchases,cf_recs,recs_con,num_purchase
0,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,[666448006],"[673677002, 507910001, 372860001, 537116001, 5...","[710899003, 712216001, 615141002, 697054002, 6...",1
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,"[671502001, 681376001, 685687001]","[355072002, 615141002, 355569001, 589222001, 3...","[685689001, 244267032, 708352001, 708379004, 6...",3
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"[573085010, 636455003, 684210001]","[706016001, 706016002, 539723005, 712587003, 5...","[695545002, 673799003, 562245001, 712587004, 6...",3
3,0000f1c71aafe5963c3d195cf273f7bfd50bbf17761c91...,[632982036],"[673677002, 372860001, 537116001, 673396002, 6...","[557248001, 685814001, 685813001, 557247001, 5...",1
4,0000f2ea26b7f0a9175f428c8cf7743e9e10e193465ecd...,[562245059],"[615141002, 673396002, 708352001, 692454002, 6...","[448509017, 635957001, 673901001, 677809002, 7...",1


In [None]:
df['num_purchase'].sum()

430815

## Evaluation

In [None]:
df['correct'] = df.apply(
    lambda row: len([value for value in row.purchases if value in row.combined]),axis=1
)

In [None]:
df.head()

Unnamed: 0,customer_id,purchases,cf_recs,recs_con,num_purchase,combined,correct
0,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,[666448006],"[673677002, 507910001, 372860001, 537116001, 5...","[710899003, 712216001, 615141002, 697054002, 6...",1,"[673677002, 507910001, 372860001, 537116001, 5...",0
1,00007d2de826758b65a93dd24ce629ed66842531df6699...,"[671502001, 681376001, 685687001]","[355072002, 615141002, 355569001, 589222001, 3...","[685689001, 244267032, 708352001, 708379004, 6...",3,"[355072002, 615141002, 355569001, 589222001, 3...",0
2,00009d946eec3ea54add5ba56d5210ea898def4b46c685...,"[573085010, 636455003, 684210001]","[706016001, 706016002, 539723005, 712587003, 5...","[695545002, 673799003, 562245001, 712587004, 6...",3,"[706016001, 706016002, 539723005, 712587003, 5...",0
3,0000f1c71aafe5963c3d195cf273f7bfd50bbf17761c91...,[632982036],"[673677002, 372860001, 537116001, 673396002, 6...","[557248001, 685814001, 685813001, 557247001, 5...",1,"[673677002, 372860001, 537116001, 673396002, 6...",0
4,0000f2ea26b7f0a9175f428c8cf7743e9e10e193465ecd...,[562245059],"[615141002, 673396002, 708352001, 692454002, 6...","[448509017, 635957001, 673901001, 677809002, 7...",1,"[615141002, 673396002, 708352001, 692454002, 6...",0


In [None]:
#Average correct recommendations per customer
df['correct'].mean()

0.2630243507456696

In [None]:
# Customers for which at least one recommendation was accurate
np.where(df['correct'] > 0,1,0).mean()

0.24584585423057995