In [0]:
import numpy as np 
import pandas as pd 
from mlxtend.frequent_patterns import apriori, association_rules
from sklearn.metrics.pairwise import cosine_similarity

Prediction of the next item by customers based on the previous purchased products can be an important factor for customer behavior analysis.

Here we are trying to find the next nth product that may be purchased by customers based on (n-1) previously purchased items.

We have initially evaluated the cosine similarity between the all the unique products and generate the matrix of similarity scores. Next, we evaluate the similar items for each n-1 items and merge the similar items. If the same product comes twice in the dictionary then we will eliminate the entry that has a lower similarity score. Next , we will sort the products based on similarity scores and display the top 10 products . 


###  Next item prediction for Online_Retail Data 



In [0]:
#Path to file (filtered_data.csv)
DATA_PATH="/content/filtered_data.csv"
data = pd.read_csv(DATA_PATH) 
data

Unnamed: 0.1,Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.10,13085.0,United Kingdom
4,4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom
...,...,...,...,...,...,...,...,...,...
400911,525456,538171,22271,FELTCRAFT DOLL ROSIE,2,2010-12-09 20:01:00,2.95,17530.0,United Kingdom
400912,525457,538171,22750,FELTCRAFT PRINCESS LOLA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
400913,525458,538171,22751,FELTCRAFT PRINCESS OLIVIA DOLL,1,2010-12-09 20:01:00,3.75,17530.0,United Kingdom
400914,525459,538171,20970,PINK FLORAL FELTCRAFT SHOULDER BAG,2,2010-12-09 20:01:00,3.75,17530.0,United Kingdom


In [0]:
# Stripping extra spaces in the description
data['Description']= data['Description'].str.strip();

In [0]:
transformed_data =(data
          .groupby(['Customer ID', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0))

In [0]:
transformed_data

Description,Customer ID,10 COLOUR SPACEBOY PEN,11 PC CERAMIC TEA SET POLKADOT,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE RED SPOTTY,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE RED SPOTTY,12 PENCILS TALL TUBE SKULLS,12 PENCILS TALL TUBE WOODLAND,12 PINK HEN+CHICKS IN BASKET,12 PINK ROSE PEG PLACE SETTINGS,12 RED ROSE PEG PLACE SETTINGS,15 PINK FLUFFY CHICKS IN BOX,15CM CHRISTMAS GLASS BALL 20 LIGHTS,2 DAISIES HAIR COMB,2 PICTURE BOOK EGGS EASTER BUNNY,2 PICTURE BOOK EGGS EASTER CHICKS,2 PICTURE BOOK EGGS EASTER DUCKS,20 DOLLY PEGS RETROSPOT,200 BENDY SKULL STRAWS,200 RED + WHITE BENDY STRAWS,24 HANGING EASTER EGGS FLORAL TUB,3 BIRDS CANVAS SCREEN,3 BLACK CATS W HEARTS BLANK CARD,3 GARDENIA MORRIS BOXED CANDLES,3 HEARTS HANGING DECORATION RUSTIC,3 HOOK HANGER MAGIC GARDEN,3 HOOK PHOTO SHELF ANTIQUE WHITE,3 PIECE JIGSAW TOADSTOOLS,...,"WRAP,SUKI AND FRIENDS","WRAPPING PAPER, XMAS VILLAGE",YELLOW + BROWN BEAR FELT PURSE KIT,YELLOW BIRDS FELT DES FOODCOVER,YELLOW BREAKFAST CUP AND SAUCER,YELLOW BROWN BEAR FELT PURSE KIT,YELLOW COAT RACK PARIS FASHION,YELLOW DRAGONFLY HELICOPTER,YELLOW EASTER EGG HUNT START POST,YELLOW FELT HANGING HEART W FLOWER,YELLOW FLOWERS FELT HANDBAG KIT,YELLOW GIANT GARDEN THERMOMETER,YELLOW KNITTED KIDS RUCKSACK,YELLOW METAL CHICKEN HEART,YELLOW PINK FLOWER DESIGN BIG BOWL,YELLOW POT PLANT CANDLE,YELLOW PURPLE DAISY FELT PURSE KIT,YELLOW RABBIT SAT EASTER,YELLOW RED FLOWER PIGGY BANK,YELLOW SHARK HELICOPTER,YELLOW VW BEETLE CERAMIC MONEY BOX,YELLOW/BLUE RETRO RADIO,YELLOW/PINK CERAMIC CANDLE HOLDER,YELLOW/PINK FLOWER DESIGN BIG MUG,YOU'RE CONFUSING ME METAL SIGN,YULETIDE IMAGES GIFT WRAP SET,YULETIDE IMAGES S/6 PAPER BOXES,ZINC FINISH 15CM PLANTER POTS,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE 2 WALL PLANTER,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
0,12346.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,12347.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,12348.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,12349.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,12351.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4307,18283.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4308,18284.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4309,18285.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4310,18286.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
#Taking Custumers Id and Item name in same variable
custumers=transformed_data['Customer ID']
items=transformed_data.columns


In [0]:
#Dropping the Custumer ID
items=items.drop('Customer ID')


In [0]:
#Assigning Custumer_Id as a new index
new_data=transformed_data.set_index('Customer ID')


In [0]:
# Taking tranpose of the new data to align with Item-Item Colloborating Filtering
new_data=new_data.T

In [0]:
# Generating similarity matrix
from sklearn.metrics.pairwise import cosine_similarity
product_similarity = pd.DataFrame(cosine_similarity(new_data, new_data), index=new_data.index, columns=new_data.index)
product_similarity.head()

Description,10 COLOUR SPACEBOY PEN,11 PC CERAMIC TEA SET POLKADOT,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,12 PENCILS SMALL TUBE RED RETROSPOT,12 PENCILS SMALL TUBE RED SPOTTY,12 PENCILS SMALL TUBE SKULL,12 PENCILS TALL TUBE POSY,12 PENCILS TALL TUBE RED RETROSPOT,12 PENCILS TALL TUBE RED SPOTTY,12 PENCILS TALL TUBE SKULLS,12 PENCILS TALL TUBE WOODLAND,12 PINK HEN+CHICKS IN BASKET,12 PINK ROSE PEG PLACE SETTINGS,12 RED ROSE PEG PLACE SETTINGS,15 PINK FLUFFY CHICKS IN BOX,15CM CHRISTMAS GLASS BALL 20 LIGHTS,2 DAISIES HAIR COMB,2 PICTURE BOOK EGGS EASTER BUNNY,2 PICTURE BOOK EGGS EASTER CHICKS,2 PICTURE BOOK EGGS EASTER DUCKS,20 DOLLY PEGS RETROSPOT,200 BENDY SKULL STRAWS,200 RED + WHITE BENDY STRAWS,24 HANGING EASTER EGGS FLORAL TUB,3 BIRDS CANVAS SCREEN,3 BLACK CATS W HEARTS BLANK CARD,3 GARDENIA MORRIS BOXED CANDLES,3 HEARTS HANGING DECORATION RUSTIC,3 HOOK HANGER MAGIC GARDEN,3 HOOK PHOTO SHELF ANTIQUE WHITE,3 PIECE JIGSAW TOADSTOOLS,3 PIECE SPACEBOY COOKIE CUTTER SET,...,"WRAP,SUKI AND FRIENDS","WRAPPING PAPER, XMAS VILLAGE",YELLOW + BROWN BEAR FELT PURSE KIT,YELLOW BIRDS FELT DES FOODCOVER,YELLOW BREAKFAST CUP AND SAUCER,YELLOW BROWN BEAR FELT PURSE KIT,YELLOW COAT RACK PARIS FASHION,YELLOW DRAGONFLY HELICOPTER,YELLOW EASTER EGG HUNT START POST,YELLOW FELT HANGING HEART W FLOWER,YELLOW FLOWERS FELT HANDBAG KIT,YELLOW GIANT GARDEN THERMOMETER,YELLOW KNITTED KIDS RUCKSACK,YELLOW METAL CHICKEN HEART,YELLOW PINK FLOWER DESIGN BIG BOWL,YELLOW POT PLANT CANDLE,YELLOW PURPLE DAISY FELT PURSE KIT,YELLOW RABBIT SAT EASTER,YELLOW RED FLOWER PIGGY BANK,YELLOW SHARK HELICOPTER,YELLOW VW BEETLE CERAMIC MONEY BOX,YELLOW/BLUE RETRO RADIO,YELLOW/PINK CERAMIC CANDLE HOLDER,YELLOW/PINK FLOWER DESIGN BIG MUG,YOU'RE CONFUSING ME METAL SIGN,YULETIDE IMAGES GIFT WRAP SET,YULETIDE IMAGES S/6 PAPER BOXES,ZINC FINISH 15CM PLANTER POTS,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE 2 WALL PLANTER,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
Description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
10 COLOUR SPACEBOY PEN,1.0,0.034789,0.000141,0.042964,0.051271,0.023564,0.031349,0.021563,0.010483,0.082959,0.107617,0.035456,0.028703,0.094731,0.011508,0.011174,0.078188,0.042403,0.057215,0.015351,0.016214,0.112985,0.0,0.07182,0.0,0.002114,0.023156,0.002114,0.087627,0.037718,0.147841,0.497732,0.0,0.014379,0.0,0.014919,0.002536,0.01857,0.0,0.100315,...,0.062421,0.034789,0.050895,0.007592,0.0,0.050688,0.003971,0.029597,0.013441,0.0,0.013684,0.0,0.0,0.122922,0.0,0.0,0.018987,0.014203,0.000236,0.038498,0.007438,0.005436,0.0,0.0,0.506489,0.023032,0.024566,0.029313,0.076151,0.0,0.0,0.0,0.0,0.0,0.01917,0.0,0.022676,0.000372,0.114003,0.0866
11 PC CERAMIC TEA SET POLKADOT,0.034789,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12 ASS ZINC CHRISTMAS DECORATIONS,0.000141,0.0,1.0,0.007517,0.038666,0.00035,0.000592,0.007871,0.002009,0.000636,0.000424,0.005867,0.00023,0.002074,5.8e-05,0.0004,0.000363,0.000284,0.00051,0.005272,0.000535,0.002371,0.011327,0.014179,0.0,0.005198,0.004941,0.004782,0.005524,0.002167,0.001583,0.004885,0.0,0.0,0.0,0.011604,0.0,0.000244,0.0,0.019016,...,0.012192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001018,0.0,0.0,0.010282,0.0,0.000215,0.001135,0.0,0.0,0.012329,0.0,0.0,0.0,0.028251,0.005246,0.008628,0.0,0.005618,0.024359,0.0,0.002795,0.0,1.7e-05,2.8e-05,0.025222,0.000101,0.014314,0.0,0.021735,0.040112
12 COLOURED PARTY BALLOONS,0.042964,0.0,0.007517,1.0,0.035432,0.011466,0.02571,0.020847,0.017931,0.011434,0.035159,0.006554,0.003947,0.012145,0.004195,0.006365,0.017108,0.022299,0.01541,0.002146,0.011078,0.07875,0.002305,0.028054,0.0,0.000753,0.001444,7.8e-05,0.028264,0.007315,0.028963,0.035347,0.0,0.0,0.008216,0.006922,0.002572,0.003437,0.0,0.024246,...,0.037287,0.0,0.001364,0.013794,3.4e-05,0.0,0.000902,0.026829,0.002442,0.0,0.001658,0.0,0.0,0.03611,0.0,0.002455,0.002649,0.0,0.001562,0.037591,0.0,0.0,0.0,0.0,0.040681,0.008816,0.002115,0.003484,0.007022,0.0,0.00455,0.0,0.0,0.000317,0.010195,0.00011,0.014176,0.0,0.016412,0.014034
12 DAISY PEGS IN WOOD BOX,0.051271,0.0,0.038666,0.035432,1.0,0.028414,0.040079,0.060255,0.143587,0.02842,0.077665,0.010584,0.007872,0.021197,0.010405,0.009146,0.033746,0.033106,0.039307,0.005285,0.026152,0.120053,0.011356,0.081308,0.0,0.003623,0.007027,0.014317,0.059885,0.009861,0.044961,0.014757,0.0,0.004469,0.001323,0.010912,8.8e-05,0.009393,0.0,0.017762,...,0.083227,0.1557,0.0,0.0,0.0,0.0,0.0,0.060526,0.015541,0.0,0.010434,0.0,0.0,0.059196,0.0,0.000329,0.017198,0.063564,0.0,0.056932,0.0,0.0,0.0,0.0,0.020841,0.020998,0.025162,0.025032,0.019968,0.005464,0.014943,0.002985,0.0,0.000223,0.055568,0.00036,0.006632,0.000208,0.028776,0.073654


In [0]:

def get_similar_products(product_name):
    if product_name not in new_data.index:
        return None, None
    else:
        sim_product = product_similarity.sort_values(by=product_name, ascending=False).index[1:]
        sim_score = product_similarity.sort_values(by=product_name, ascending=False).loc[:, product_name].tolist()[1:]
        return sim_product, sim_score

In [0]:
# Checking the function Prediction for a product
products, score = get_similar_products('12 ASS ZINC CHRISTMAS DECORATIONS')

#Displaying top 10 similar products

for x,y in zip(products[:10], score[:10]):
    print("{} --- {}".format(x, y))

CUT GLASS T-LIGHT HOLDER OCTAGON --- 0.39671036373016016
TRIANGULAR RIDGED GLASS POSY HOLDER --- 0.21030027720879824
BLACK RECORD COVER FRAME --- 0.20500499248478785
CERAMIC STRAWBERRY TRINKET TRAY --- 0.2036915471144584
FAIRY CAKE CERAMIC BUTTER DISH --- 0.1985346818355613
ETCHED GLASS STAR TREE DECORATION --- 0.19002751663206288
RED REINDEER STRING OF 20 LIGHTS --- 0.168629933697079
MOROCCAN BEATEN METAL DISH --- 0.16676735251841912
ANGEL DECORATION STARS ON DRESS --- 0.15355641707584874
SMALL HANGING IVORY/RED WOOD BIRD --- 0.15219783038968182


In [0]:
# Helper function to display top 10 similar products.
def get_recommendation(item_array):
  res = {}
  for i in item_array:
    animes, score = get_similar_products(i)
    for x,y in zip(animes[:10], score[:10]):
      if x in res:
        if res[x] > y:
          continue
        else:
          res[x] = y
      else:
        res[x] = y
          
  return res
    


In [0]:
#Predicting the next item of a custumer , given n-1 items of a custumer.
# n=3, 
items=['12 ASS ZINC CHRISTMAS DECORATIONS','12 COLOURED PARTY BALLOONS','12 DAISY PEGS IN WOOD BOX']    
resp = get_recommendation(items)
res = sorted(resp.items(), reverse=True, key=lambda x: x[1])

for i in range(0,10):
  print(res[i])

('SAVE THE PLANET COTTON TOTE BAG', 0.9104783035976797)
('MAGNETS PACK OF 4 VINTAGE COLLAGE', 0.8900696841088993)
('MAGIC DRAWING SLATE DINOSAUR', 0.8812436242995388)
('EASTER CRAFT IVY WREATH WITH CHICK', 0.8572010949657745)
('WOODLAND PARTY BAG + STICKER SET', 0.8337442805462084)
('MINI HIGHLIGHTER PENS', 0.8244774418426081)
('MAGIC DRAWING SLATE SPACEBOY', 0.8129907558665912)
('MAGIC DRAWING SLATE DOLLY GIRL', 0.8127932554909505)
('MAGNETS PACK OF 4 CHILDHOOD MEMORY', 0.770718701402017)
('LUNCH BAG SUKI  DESIGN', 0.7628788346557364)


For the 3 items initially bought by the custumers , he might buy the next item 'SAVE THE PLANET COTTON TOTE BAG', 'MAGNETS PACK OF 4 VINTAGE COLLAGE','MAGIC DRAWING SLATE DINOSAUR' as these have the similarity score around 0.9.

## Next item prediction of CES_Hybrid Data


In [0]:
#Path to file (ces/preprocessed_ces_hybrid.csv)
DATA_PATH = '/content/preprocessed_ces_hybrid.csv'
ces_data = pd.read_csv(DATA_PATH) 

In [0]:
ces_data

Unnamed: 0,id,city,income,members,description
0,10001,Belem,25_to_43,4,beef_liver
1,10001,Belem,25_to_43,4,beer_snacks
2,10001,Belem,25_to_43,4,beetroot
3,10001,Belem,25_to_43,4,black_grapes
4,10001,Belem,25_to_43,4,borlotti_beans
...,...,...,...,...,...
57855,90200,Recife,2.5_to_5,4,soy_oil
57856,90200,Recife,2.5_to_5,4,sun_dried_silverside
57857,90200,Recife,2.5_to_5,4,tagliatelle_pasta
57858,90200,Recife,2.5_to_5,4,tomato_pulp


In [0]:
# Adding another feature Quantity and intializing for each item as 1.
ces_data['quantity'] =1

In [0]:
mat_data = (ces_data.groupby(['id', 'description'])['quantity']
                .sum().unstack().reset_index()
                .fillna(0)
                .set_index('id'))

In [0]:
mat_data

description,acai_berry,acai_berry_pulp,ajinomoto_sauce,alphabet_pasta,amazon_papaya,anchovy,anjou_pear,annatto,argentinian_apple,arrowroot_powder,asparagus,assorted_candies,atlantic_seabob_shrimp,aubergine,avocado,baby_potato,bacon,bahia_beans,banana,banana_other_kind,basella,basil,battered_beef,battered_chicken_breast,battered_chicken_drumstick,battered_chicken_fillets,battered_chicken_tapas,battered_chicken_wings,battered_whiting_fish_fillets,bay_leaf,bbq_sauce,bean_sprouts,beans_other_kind,beef_breast,beef_brisket,beef_broth,beef_burger,beef_chunk,beef_flank,beef_foot,...,vinegar,vitamin,vitamin_powder,vodka,vodka_imported,waffer_biscuit,watercress,watermelon,west_indian_cherry,west_indian_cherry_pulp,wheat_flour_special,wheat_flour_with_yeast,wheat_for_kibbeh,whipped_cream,whipped_cream_reduced_fat,whisky,whisky_imported,white_beans,white_rice,white_rolls_bread,white_sauce,white_split_tin_bread,whiting_fish,whiting_fish_fillets,whole_oats,whole_wheat_flour,wholegrain_mustard,wholegrain_rice,wholemeal_bread,wholemeal_toast,wholemeal_wheat_bread,wine,wine_imported,yakult,yam,yeast,yellow_mustard,yellow_pepper,yogurt_with_fruit_pieces,yogurt_with_honey
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
10001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10002,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10003,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
10004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90196,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90197,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [0]:
final_data_ces = mat_data.fillna(mat_data.mean(axis=0))

In [0]:
final_data_ces =final_data_ces.T

In [0]:
product_sim_ces = pd.DataFrame(cosine_similarity(final_data_ces, final_data_ces), index=final_data_ces.index, columns=final_data_ces.index)
product_sim_ces.head()

description,acai_berry,acai_berry_pulp,ajinomoto_sauce,alphabet_pasta,amazon_papaya,anchovy,anjou_pear,annatto,argentinian_apple,arrowroot_powder,asparagus,assorted_candies,atlantic_seabob_shrimp,aubergine,avocado,baby_potato,bacon,bahia_beans,banana,banana_other_kind,basella,basil,battered_beef,battered_chicken_breast,battered_chicken_drumstick,battered_chicken_fillets,battered_chicken_tapas,battered_chicken_wings,battered_whiting_fish_fillets,bay_leaf,bbq_sauce,bean_sprouts,beans_other_kind,beef_breast,beef_brisket,beef_broth,beef_burger,beef_chunk,beef_flank,beef_foot,...,vinegar,vitamin,vitamin_powder,vodka,vodka_imported,waffer_biscuit,watercress,watermelon,west_indian_cherry,west_indian_cherry_pulp,wheat_flour_special,wheat_flour_with_yeast,wheat_for_kibbeh,whipped_cream,whipped_cream_reduced_fat,whisky,whisky_imported,white_beans,white_rice,white_rolls_bread,white_sauce,white_split_tin_bread,whiting_fish,whiting_fish_fillets,whole_oats,whole_wheat_flour,wholegrain_mustard,wholegrain_rice,wholemeal_bread,wholemeal_toast,wholemeal_wheat_bread,wine,wine_imported,yakult,yam,yeast,yellow_mustard,yellow_pepper,yogurt_with_fruit_pieces,yogurt_with_honey
description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
acai_berry,1.0,0.0,0.133112,0.0,0.044836,0.0,0.0,0.151976,0.056136,0.089087,0.0,0.060783,0.0,0.0,0.219943,0.0,0.033086,0.0,0.110507,0.0,0.0,0.0,0.0,0.081832,0.0,0.0,0.0,0.0,0.0,0.034943,0.0,0.0,0.01631,0.099834,0.127321,0.044544,0.029161,0.034503,0.0,0.119523,...,0.133987,0.0,0.0,0.0,0.0,0.0,0.0,0.030792,0.074848,0.157485,0.035635,0.018509,0.054554,0.141806,0.0,0.041239,0.0,0.043644,0.010817,0.049417,0.0,0.374737,0.06816,0.176006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.078386,0.087287,0.0,0.235159,0.136626,0.050063,0.0,0.01118,0.065795
acai_berry_pulp,0.0,1.0,0.038125,0.0,0.089891,0.0,0.0,0.019346,0.064312,0.0,0.0,0.046424,0.0,0.02665,0.031497,0.0,0.07581,0.0,0.058432,0.021678,0.0,0.0,0.0,0.03125,0.0,0.072169,0.078087,0.0,0.0,0.040032,0.188982,0.102062,0.0,0.0,0.0,0.076547,0.100223,0.0,0.043519,0.0,...,0.084426,0.0,0.0,0.0,0.0,0.068752,0.111803,0.105831,0.0,0.036084,0.112268,0.0,0.0625,0.054153,0.0,0.0,0.0,0.0,0.06196,0.028307,0.0,0.0,0.039043,0.036662,0.05,0.042258,0.0,0.0,0.048113,0.0,0.0,0.067352,0.05,0.088388,0.0,0.044721,0.114708,0.125,0.089655,0.0
ajinomoto_sauce,0.133112,0.038125,1.0,0.039375,0.109667,0.0,0.0,0.141608,0.026153,0.062257,0.0,0.056637,0.059815,0.097538,0.096065,0.029348,0.173413,0.0,0.11287,0.013223,0.0,0.0,0.0,0.038125,0.0,0.0,0.0,0.0,0.0,0.097677,0.0,0.0,0.011398,0.023256,0.022244,0.093386,0.071325,0.024112,0.053093,0.111369,...,0.159179,0.0,0.054779,0.056637,0.0,0.041939,0.068199,0.086075,0.052307,0.110056,0.080934,0.02587,0.038125,0.14039,0.0,0.02882,0.0,0.0,0.075591,0.086335,0.04598,0.215666,0.047633,0.178908,0.0,0.077331,0.076249,0.0,0.058697,0.053916,0.0,0.082169,0.060999,0.080875,0.054779,0.04092,0.0,0.0,0.109378,0.0
alphabet_pasta,0.0,0.0,0.039375,1.0,0.132628,0.0,0.062622,0.05994,0.110702,0.0,0.0,0.047946,0.0,0.027524,0.03253,0.0,0.058722,0.0,0.080464,0.022389,0.0,0.149071,0.0,0.0,0.0,0.0,0.080648,0.0,0.0,0.041345,0.0,0.105409,0.0,0.039375,0.0,0.131762,0.051755,0.0,0.044947,0.0,...,0.095121,0.0,0.0,0.0,0.0,0.142014,0.11547,0.109302,0.088561,0.0,0.094868,0.0219,0.06455,0.111858,0.0,0.0,0.0,0.0,0.051194,0.0,0.0,0.052164,0.040324,0.09466,0.05164,0.0,0.0,0.0,0.099381,0.0,0.0,0.023187,0.0,0.136931,0.046374,0.046188,0.0,0.129099,0.079368,0.0
amazon_papaya,0.044836,0.089891,0.109667,0.132628,1.0,0.088969,0.074749,0.115271,0.136544,0.02097,0.0,0.181232,0.090664,0.158795,0.135903,0.049427,0.210281,0.051367,0.302144,0.169254,0.05743,0.088969,0.038829,0.109154,0.07705,0.118626,0.168464,0.0,0.074141,0.106928,0.0,0.104852,0.115179,0.094,0.142359,0.20446,0.271134,0.089339,0.107301,0.046891,...,0.430512,0.045944,0.101483,0.047693,0.036322,0.230729,0.149317,0.30201,0.070474,0.12604,0.379563,0.148133,0.128416,0.280947,0.0,0.077659,0.048731,0.06164,0.280076,0.174483,0.06195,0.103776,0.080221,0.21845,0.071913,0.078143,0.025683,0.045944,0.10874,0.054482,0.074141,0.216804,0.06164,0.127126,0.12916,0.188369,0.070706,0.07705,0.26579,0.06195


In [0]:
def get_similar_product_ces(product_name):
    if product_name not in final_data_ces.index:
        return None, None
    else:
        sim_animes = product_sim_ces.sort_values(by=product_name, ascending=False).index[1:]
        sim_score = product_sim_ces.sort_values(by=product_name, ascending=False).loc[:, product_name].tolist()[1:]
        return sim_animes, sim_score

In [0]:
# Checking the function Prediction for a product
animes, score = get_similar_product_ces('amazon_papaya')
for x,y in zip(animes[:10], score[:10]):
    print("{} --  {}".format(x, y))

french_bread --  0.4463091570732047
egg --  0.44041832481001403
chocolate_powder --  0.4310899208108515
vinegar --  0.4305120879429061
mayonnaise --  0.4163955911684075
garlic --  0.40961649233592456
canned_peas --  0.3987599228786232
cauliflower --  0.3947078178725582
soy_oil --  0.38967659810337474
cabbage --  0.3889812577077757


In [0]:
# Helper function to display top 10 similar products.
def get_rc(arr):
  res = {}
  for i in arr:
    animes, score = get_similar_product_ces(i)
    for x,y in zip(animes[:10], score[:10]):
      if x in res:
        if res[x] > y:
          continue
        else:
          res[x] = y
      else:
        res[x] = y
          
  return res
    


In [0]:
#Predicting the next item of a custumer , given n-1 items of a custumer.
items=['alphabet_pasta','amazon_papaya']
resp = get_rc(items)
res = sorted(resp.items(), reverse=True, key=lambda x: x[1])

for i in range(0,10):
  print(res[i])

('french_bread', 0.4463091570732047)
('egg', 0.44041832481001403)
('chocolate_powder', 0.4310899208108515)
('vinegar', 0.4305120879429061)
('mayonnaise', 0.4163955911684075)
('garlic', 0.40961649233592456)
('canned_peas', 0.3987599228786232)
('cauliflower', 0.3947078178725582)
('soy_oil', 0.38967659810337474)
('cabbage', 0.3889812577077757)


For the 2 items initially bought by the custumers ('alphabet_pasta''amazon_papaya'), he might buy the next item , 'french_bread',' egg' as these have the highest similarity scores aroubd 0.5.