In [1]:
import pandas as pd
import random
from collections import defaultdict

In [2]:
from surprise import SVD
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import train_test_split

## Constants

In [83]:
top_N = 30 # select top_N food by SVD
top_N_const = 10 # select top_N_const food by applying constraint
nutr_error = 50 # accept -e% ~ +e% from target nutrients

In [4]:
# number of results to print
u_sample = 5
incl_ingr_sample = 3
excl_ingr_sample = 3
target_nutr_sample = 3

In [5]:
file_path = '../data/rating_data.csv'

In [6]:
reader = Reader(line_format='user item rating', sep=',', rating_scale=(0, 5))
data = Dataset.load_from_file(file_path, reader = reader)

In [7]:
# train on a while trainset
# train_set = data.build_full_trainset()
# test_set = train_set.build_anti_testset()

In [8]:
# split train set and test set
train_set, test_set = train_test_split(data, test_size=.25)

In [9]:
# use SVD algorithm
algo = SVD()

In [10]:
# train
algo.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x25c11cc96a0>

In [11]:
# predict rating for test set
predictions = algo.test(test_set)

In [12]:
def get_top_n(predictions, n=10):
    """Return the top-N recommendation for each user from a set of predictions.
    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.
    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [13]:
top_N_recommend = get_top_n(predictions, top_N)

In [14]:
# Print the recommended items for each user
for uid, user_ratings in top_N_recommend.items():
    print(uid, [iid for (iid, _) in user_ratings])

6316 ['33305', '73929', '119320', '51644', '130399', '119372', '156015', '120226', '103100', '27766', '28199', '89274', '175089', '48168', '15298', '158480', '106867', '50909', '146391', '49063', '42256']
846 ['146982', '85715', '151678', '34390', '46833', '172179', '21814', '89188', '150120', '175555', '76739', '175772', '171522', '67106', '43154', '77292', '90100', '113987', '83661', '178090', '151330', '51687', '84382', '165439', '37204', '3894', '140803', '79621', '135143', '33228']
483 ['130961', '56307', '87391', '116491', '55223', '112132', '63577', '148690', '109678', '103208', '38720', '69221', '2820', '48821', '52114', '52593', '89274', '48033', '84016', '115148', '159105', '155990', '57868', '50389', '152698', '61714', '8138', '63377', '60492', '122091']
1573 ['63482', '128596', '103574', '60133', '61522', '18266', '131506', '21302', '165956', '170569', '41204', '72064', '136139', '161588', '36471', '82283', '162916', '168119', '7134', '124836', '60160', '76645', '168732', '

6060 ['71879', '121777', '51263', '155665', '119837', '8157', '96302', '52334']
484 ['110289', '96966', '103033', '59348', '64725', '67020', '65062', '47778', '49070', '124214', '55223', '79954', '116705', '41740', '16700', '35802', '68743', '125548', '130672', '69677', '13252', '110941', '178244', '56660', '106489', '106444', '110693', '77591', '50409', '139473']
5253 ['154511', '52748', '152528', '79284', '128535', '80761', '158632', '167167', '27071', '2485', '38989', '49833', '178051', '36204', '155527']
918 ['127024', '68477', '155585', '116477', '4187', '178084', '171057', '127580']
5570 ['104729', '8582', '143474', '7004', '84216', '11636', '42418', '49111', '38123', '86415', '53049', '118462', '41100', '139633', '100414', '157508', '39936', '129505', '53765', '78281', '122337', '30629', '59002', '33664']
2912 ['16941', '85810', '10253', '1124', '6190', '97302', '130480', '39321', '14846', '76885', '167853', '88697', '114945', '109237', '125078', '121221', '118775', '116870', '9

387 ['168038', '139591', '143432', '27552', '154727', '109557']
1384 ['104723', '78624', '17876', '78333', '32282', '90385', '13730', '39619', '114402', '135961', '126649', '15215', '46184', '101713', '165260', '53713', '109766', '146306', '153165', '129505', '9325', '177568', '19440', '143701', '88221', '11747', '139304', '161963', '133180', '37027']
3097 ['1918', '23014', '19634', '2105', '57069', '157255', '41657', '30291', '132697', '27360', '149210', '174774', '93480', '113685', '62427', '124371', '177628', '65220', '92497', '176716', '42847', '33715', '3205', '96782', '37360', '68427', '17531', '5351', '168402', '49536']
3987 ['17258', '108642', '134885', '18373', '129336', '65776', '152403', '129174', '69121', '87450', '101739', '135875', '168476', '129719', '165826', '63487', '4373', '151451', '139548', '72873', '169420', '45740', '136212', '9499', '170814', '169505', '128281', '172114', '56103', '163279']
2598 ['149428', '136789', '84983', '131315', '26920', '64219', '11102', 

6625 ['133958', '103500', '173834', '19209', '112817', '95276', '12913', '75081', '81966', '20578', '121209', '118297', '3552']
2015 ['154511', '63552', '21854', '135961', '28597', '113468', '140819', '5099', '143956', '102567', '154689', '125818', '126870', '24525', '102218', '173750', '12582', '109558', '83882', '141756', '76265', '53690', '64489', '60909', '116064', '5210', '136938']
12066 ['139822', '79095', '84719', '114445', '84699', '65899', '46490', '56425']
2849 ['65200', '9863', '67666', '119308', '102888', '41559', '1088', '116310', '4784', '1481', '112127', '76192', '11295']
6282 ['3257', '52505', '68379', '34928']
4992 ['145805', '19086', '113620', '83393', '81284', '14509', '2280', '21461', '121494', '146121', '110941', '79316', '28099', '9578', '78333', '34629', '19166', '56425', '45824', '92697']
5575 ['176120', '158588', '21802', '140101', '10394', '150229', '119627', '176259', '10566', '31861', '63522', '30794', '34251']
3743 ['177446', '93480', '101831', '15949', '53

3017 ['48398', '1209', '118051', '155867', '34602', '102699', '13197', '148451', '7012', '41788', '65106', '111032', '74252', '46490', '61480']
21447 ['143345', '122683', '88440', '165558']
18254 ['148487', '36555', '38511']
6715 ['9779', '33296', '59235', '167070']
2366 ['131819', '75244', '90189', '143020', '61725', '37348', '30059', '39165', '79130', '74052', '94992', '56232', '36463', '174165', '62260', '15449', '4886', '103415', '39314', '153638', '65899', '140941', '109508', '108401', '113698', '105376', '68689', '171987', '21908', '37968']
14845 ['127080', '14838']
2604 ['165112', '5299', '166814', '93311', '125805', '31472']
9343 ['95452']
12672 ['89113']
3255 ['54495', '62469', '1319', '30756', '145305', '66613']
2608 ['994', '127377', '30000', '43080', '53395']
13680 ['12583', '154916']
8172 ['86420', '25999', '144458', '84120', '7012', '84437']
4139 ['54730', '105220', '32840', '125939', '155664']
3354 ['17181', '34410', '75017', '168476', '156967', '33305', '41372', '142542

6790 ['99184', '52211', '38511', '68665']
12135 ['142052', '118083', '156278', '165739', '73879', '17360']
1498 ['28705', '4708', '177851', '142979', '85251', '152670', '72291', '119998', '21781', '114570', '7480', '164095']
80 ['53545', '110941', '32715', '140087', '92542', '32310', '128767', '46490', '114480', '132951', '55659', '19068', '11944', '29638', '147765', '154689', '52100', '138635', '26', '109809', '95378', '162416', '40527', '168126', '33026', '23672', '155664', '127083']
2254 ['56425', '65163', '177020', '161202', '62636', '134610', '13779', '10394', '53408', '175410', '4839', '57758', '9054', '66416', '84796', '27164', '48614', '99511', '151770', '36005', '74663', '66711', '84077', '31343', '137597', '90183', '89113', '127080', '128131', '121494']
13137 ['136526', '26863', '147374', '54986', '19812', '177882']
10986 ['8177', '106973', '20172', '102501']
2948 ['93120', '3647', '31881', '105478', '117171', '60344', '113281', '29746', '85925', '29638', '23291', '106882', '

6615 ['162950', '5930', '66711', '38696', '172814', '146204', '126965', '33510', '156041', '86843', '137086', '79017', '161211', '85769', '43615', '114849', '44530', '113765', '72975', '130137', '56908', '9912', '39515']
11484 ['147180', '133520', '131055', '161675', '77155']
558 ['11466', '112978', '47837', '10394', '148461', '93685', '62553', '21213', '39308', '82873', '145538', '29858', '7634', '84321', '99762', '43182', '130573', '154036', '97372', '5939', '140485', '2856', '19019', '163671', '129768']
5147 ['14786', '2362', '73487']
10371 ['163130', '30339', '46229', '158696', '96152', '86440', '40315', '129980', '94004', '159578']
22269 ['147374', '55771']
10407 ['24611', '128131', '94559', '52789', '108786', '160477']
4052 ['127080', '41063', '147180', '142941', '63746', '67233', '130736', '3164', '101995', '91586', '55362', '89637', '92365', '16151', '155866', '2207']
2485 ['169158', '9190', '43057', '159554', '79130', '29746', '66593', '61749', '103923', '65584', '48421', '377

5358 ['108630', '10076', '110391', '66855', '172585']
20857 ['161211', '55971']
7615 ['147694', '4432', '71596', '30573', '121487', '52510', '46353', '14388', '54277', '46165', '33130', '6940', '99732', '108216', '108121', '157628', '111550', '6446', '114335', '164224', '158271', '12701', '141099', '10735']
21576 ['162789', '154040', '68209']
20038 ['120777']
6600 ['18693', '35410', '117469', '81554', '68479']
16509 ['21286', '39762', '123123']
1363 ['59880', '56425', '68483', '146997', '169162', '31741', '69783', '141094', '171320']
518 ['44860', '103088', '68351', '14048', '121513', '124371', '29365', '75425', '155513', '50437', '69998', '22627', '116338', '136931', '107214', '140206', '77852', '84558', '2848', '107940', '8807', '82187', '67155', '8832']
1014 ['37949', '53327', '324']
4045 ['152214', '169682', '13516', '16493', '55598', '19297', '155736', '118451', '172714', '113961', '138055', '65109', '37217', '175525', '46841', '165154', '142774', '98186', '49817', '79968', '51843

12526 ['161029', '22115', '52334', '138882', '130433']
19029 ['106975']
18611 ['82605']
17462 ['70150']
9909 ['144585', '15929', '162644', '78333', '14711', '162866', '57427', '36670', '131890', '71488', '89665', '168561', '134665', '142012', '129979', '112978', '56336', '113698', '154872', '142799', '83716', '50712', '88706']
4332 ['13104', '13730', '109176', '89409', '86519', '144458', '38033', '86066', '153207', '35645']
669 ['743', '97959', '6397', '44232']
8733 ['97380', '139035', '5502']
10355 ['164141', '74633', '8170']
7037 ['48743', '34717']
16766 ['64526', '90709', '80978', '148810']
6405 ['147383', '19297']
5861 ['134826', '95921', '124786', '16977', '168584', '58795', '138555', '79130', '176663', '42421', '148686', '65535', '108355', '111119', '17613', '54775', '77268', '114163', '125148', '124188', '67379', '25319']
19445 ['52050']
3045 ['71530', '15735', '131887', '11466', '166155', '161936', '156832', '118496', '125501', '64814', '122379']
23538 ['56425']
1461 ['159693',

8448 ['55559', '82370', '118001', '113525', '96175', '20691', '22193', '57203']
17110 ['88097', '20898', '146214', '31965', '43748']
22638 ['106969', '79996', '164830']
14446 ['168167', '97469']
5397 ['37259', '5289', '116139', '138481']
4568 ['128647', '117899', '172616', '42406', '165828', '25687', '43958', '138207', '17479']
14629 ['96505', '128607', '162416']
4439 ['139267', '96077', '85475', '72204', '89906', '94887', '85352', '176215', '74037', '170359', '170297', '29048']
11701 ['124891', '49686', '27724', '126156']
4297 ['113698', '163298', '173688', '171559', '3165', '54149']
1222 ['147016', '91777', '125205', '150120', '28650', '171646', '124383', '23791', '129004', '38803', '45309', '1741']
4387 ['24611', '100133', '19777', '60911', '35790', '122228', '42197', '20510', '166102', '130577', '131496', '67911', '131215', '119077', '153531', '64804', '53595', '7948', '46487', '26686', '98000', '138875', '71885']
22102 ['160369']
7321 ['30125', '116630', '152192', '129027', '23804

11690 ['117899']
1089 ['7938', '93642', '118515', '365', '31823']
1677 ['647', '154851', '42396', '129518', '22260', '25410']
8520 ['72253', '166812', '168605']
19877 ['117899', '56402', '42528', '81220']
8685 ['171454', '5450', '134838', '101819', '48069', '101192', '32780', '84747', '92292', '8859']
9586 ['25450']
23074 ['61725', '171717', '106975', '135961']
7801 ['98698']
8002 ['5511', '131506', '102598', '87620', '147559', '12160', '145753', '102523', '86859']
3519 ['42095', '168971', '20255', '77718', '178051', '57551', '130853', '75029', '41171', '10955', '77785', '64305', '1427']
1063 ['159123', '159419', '26733', '155510']
7983 ['4790', '117356', '106975', '31149', '166438', '131023', '123123', '9651', '43057', '46490', '7926', '62999', '104832', '105000', '64311']
3414 ['149428', '138873', '97466', '1334', '168258', '55636', '144312', '111487', '110420', '93847', '39867', '154852', '149703', '160237', '92697', '160189', '109383', '121790', '96404', '101819', '128114']
18926 [

2682 ['161985', '11324', '12095', '99787', '22259', '15180', '176551', '19634', '47715', '21416']
7762 ['4561', '163625', '9480', '58317', '20994', '65776', '59584']
16630 ['95316', '77478']
4199 ['1783', '35079']
14048 ['44177']
24768 ['103175', '95880']
7466 ['99914', '128607', '162031', '113704', '87138', '119877', '89084', '84914', '112658', '14968', '85733', '30821', '88713', '157308', '160270', '115087', '62259', '148382', '8792']
5707 ['168906', '84405', '128787']
19084 ['117899']
7562 ['71592', '142052', '79691', '68250', '71170', '57106', '34186']
6400 ['15307', '115861', '130354', '106926', '20816', '3332', '14896', '112535']
13204 ['171132']
4163 ['114070', '145326', '91978']
19849 ['126870', '89218', '141503']
1405 ['118496', '52257', '60626', '23090']
2342 ['92597', '116296', '96410', '111345', '120059', '10241', '106010', '101579', '104182', '1310']
6659 ['129214', '84437', '114478']
23962 ['134451']
8675 ['93847', '62365', '97466', '97819', '151058', '55003', '156529']
8

9979 ['145641', '42900', '95880', '8886', '144648', '177884']
2104 ['18499', '59986', '52727', '65822', '53728', '90038', '30125', '147057', '130650', '124230']
15524 ['144277', '169900']
18401 ['64969']
6296 ['113824', '145683', '145305', '3856', '62659', '72415', '9002', '37047']
3086 ['35164', '166633', '130972', '155731', '125523', '101456', '90129', '69752', '29274', '102695', '101060', '141558']
3322 ['52334', '52602', '81021']
14361 ['36359', '13779', '152329', '172747']
14486 ['173085', '121186']
1691 ['654', '73622', '67158', '4471']
4899 ['78412', '113069']
7721 ['147427', '10076']
14891 ['34236', '67738']
1948 ['68803', '108342', '19085', '35746']
7976 ['4790', '99801', '41668', '153288']
16560 ['18909']
1152 ['11115', '82257', '5077', '37609', '154481', '126557', '100764', '140823', '76449', '144555', '138921', '10866', '1092', '49465', '46957', '156353', '111810', '129430', '106840']
14979 ['150197', '91044', '25176', '59790', '166233', '108289', '49310', '118814', '15063'

19306 ['130507', '170097', '152455', '117899', '78570', '68803', '97465']
5680 ['46779', '55221']
13692 ['80978', '102527', '115910', '59185']
8567 ['113346', '27013', '122729', '26928']
7468 ['161856', '16248', '75579']
2351 ['1224', '55559', '114028', '148527', '92689', '8026', '43853', '156437', '147729', '903', '30507', '171925']
20760 ['153988']
8484 ['156041']
2913 ['5976', '147344', '93952', '13908', '37144', '20821', '176746', '90157', '97948', '19849', '156649', '166586', '12198']
22920 ['154916', '54749', '99787']
6205 ['130786']
9053 ['170560', '33148', '101371', '135961', '21155', '90280', '84705']
17600 ['119351', '74367']
13552 ['40304', '177839']
10748 ['138991', '95378']
17122 ['117899', '102209']
18380 ['28552', '66500']
5495 ['12178', '163923', '42718', '177550', '103767', '106975']
13651 ['135961', '33864', '112411', '127068']
23957 ['135961']
10858 ['78333', '84276', '72481', '143818', '8027', '143310']
4049 ['97948', '161211', '1711', '86781', '14789', '54613']
129

16902 ['48090', '105721']
4033 ['112024', '16267', '14901', '132812', '2302', '30914', '8392']
23611 ['172585']
23177 ['128479', '174245', '127083']
22364 ['46741']
17529 ['19770', '131983']
18710 ['37359']
4501 ['133103']
9003 ['5838', '154054', '89084', '117899', '79316', '35598', '84225', '144257', '8533', '120706', '56425']
21705 ['135961']
6574 ['101819', '13950', '54421']
23512 ['129589']
11021 ['24513', '152159', '50009', '116390', '8240', '56470', '178119', '71394', '71487']
18805 ['19297']
10111 ['172785', '154688']
6515 ['84705', '39905']
22414 ['47292']
20735 ['37210']
8394 ['149629', '113698', '55066', '52727']
16825 ['27952', '63425', '19813', '83725', '52176']
21298 ['38923', '149405']
10673 ['142941', '121494', '11895', '146382', '31827', '94179', '31430']
6771 ['15173', '110916', '29362', '42443', '136935', '33338', '3690']
13785 ['85409', '166646']
16042 ['35986', '88629']
23091 ['86639']
21335 ['52183']
13275 ['145875', '11814', '162352']
1979 ['21755', '35164', '1411

23557 ['80977']
12078 ['149164', '166212']
17910 ['62886']
6947 ['73888']
17561 ['19770']
17345 ['49279', '154068']
22516 ['97975', '99787']
17518 ['55226', '29746', '124041', '115898']
3274 ['49522', '137851', '154490', '43748']
14073 ['13425', '95486']
1050 ['145984', '35182', '127038', '82766', '31938', '56058']
1747 ['85490', '62355', '115884', '165217', '73498', '57086']
3532 ['62794', '111708', '100399', '21202', '138680', '138928']
6758 ['3676']
12532 ['27953', '93480']
3183 ['138882', '46132', '1274', '100152']
9914 ['152904', '150412', '97261', '52300', '102632']
12419 ['145727']
15103 ['56388', '149428']
23065 ['171717']
22292 ['136329', '99937']
12110 ['135781', '22945', '82712']
17744 ['84833', '138918', '23003']
22078 ['149988']
20509 ['159297', '113963']
23546 ['56425']
24900 ['103800']
20563 ['162032', '161471']
20962 ['37059']
15522 ['19349', '157097']
1082 ['121851', '46234', '106802', '93728']
11287 ['127852', '103088', '45267', '45649', '103369', '123808']
21735 ['56

4948 ['2262', '170188']
5750 ['27226', '68803']
1905 ['165343', '128192', '135675', '103183', '160193', '23291', '129305']
21634 ['40959', '77286']
18359 ['46949']
10961 ['153429', '110773', '8177', '124836']
9049 ['160815', '38746', '14792']
24248 ['74467']
9969 ['142634', '11807']
4603 ['86439']
7833 ['132583', '82100', '163647']
1788 ['99787']
6234 ['3231', '106926', '100436']
9221 ['100387', '39493', '61264']
24700 ['147383']
10807 ['112006', '17368']
15116 ['15173', '136962']
8925 ['80859']
10089 ['73729', '7012']
20238 ['48265']
14608 ['66897', '29409']
24324 ['108289', '151959']
10948 ['110312']
12345 ['154279', '159939', '75576', '108522', '71694']
11240 ['17766']
2148 ['143019', '806']
18116 ['165856', '147273']
17847 ['158404']
20809 ['87820']
16625 ['131968', '58352', '172562', '147507', '70556', '91852']
8549 ['89143', '145152']
23073 ['53870']
15695 ['16456']
24838 ['149783']
6069 ['43824', '3946', '155954']
23335 ['80262', '163255']
18881 ['101229', '113644', '70289']
184

4418 ['85490', '168319']
9396 ['101904']
22612 ['136244']
18411 ['36005', '92082']
3035 ['157857', '99939', '23933', '81109', '32449', '32208', '111119']
24807 ['97967']
17471 ['176259', '117521', '177392']
20228 ['33196']
4208 ['38413', '155736', '117899', '41436', '135512', '28005', '89113', '63760', '11324', '34981', '22967']
19653 ['40395']
5834 ['38328', '146286', '78057', '91386', '88702']
22203 ['174298', '73804']
3008 ['31022', '161249', '138978']
19083 ['81116', '96883']
7471 ['125296', '4287', '72211', '162018', '119749']
5510 ['105392']
22732 ['117111', '128958']
3899 ['10538', '96770', '113937', '109775', '112260']
10388 ['112723', '98203', '66392', '136655']
14861 ['50909', '92341', '14873']
23782 ['122679']
20430 ['125296']
8264 ['112889', '106926', '123336']
8395 ['40062']
1501 ['43748']
22748 ['50909', '134610']
9174 ['174331', '123686']
13741 ['157528', '109205', '92970']
23549 ['89888']
9634 ['40017']
2621 ['19297', '37047']
20783 ['147350']
13339 ['72387']
7557 ['111

21770 ['75037']
5683 ['90112']
8295 ['5077', '29576', '135781', '59949']
18187 ['134665', '125637']
7430 ['18593', '155800', '105889', '151024', '128596', '70152', '67884']
2165 ['115726', '163127', '169992']
20506 ['154916', '30125']
4710 ['95770', '78435']
17233 ['91255']
5050 ['106697', '62427', '87980', '100151', '94187']
21928 ['64847']
22454 ['96830']
15531 ['105272']
8842 ['135110']
18182 ['19634', '72831', '141200']
7869 ['60504']
12774 ['36155', '53408', '25394', '10975']
19589 ['30590', '45812']
14441 ['19297', '28552']
23569 ['163647']
24071 ['83092']
24430 ['149456', '143117']
21829 ['37359']
5022 ['25290']
8764 ['5529', '65630', '73888']
14670 ['14451', '114447', '98209']
30 ['160999', '33524']
14888 ['146982']
20472 ['34296', '69399']
15480 ['121513', '16052']
9175 ['174023', '176215']
6816 ['3763', '39011']
15631 ['134610', '57320', '117899']
2670 ['154054', '46641']
12848 ['48823', '139571', '131006']
5540 ['143777', '113961']
24827 ['98825']
4401 ['32511', '67752']
886

20044 ['41819']
9121 ['21453', '27211']
10038 ['90536', '99787', '21497', '150728', '85648', '150321', '60148', '6987', '98734', '47668', '13175', '161791', '77974', '28201', '134610', '173131', '59076']
2048 ['175416', '61519', '49391', '30763']
24442 ['136364']
8385 ['116421', '92697']
6127 ['82712', '109747', '30048', '8934', '29338', '168342', '69640']
10540 ['69585']
9761 ['44502', '58146']
20143 ['32747']
21948 ['105232']
20371 ['33864', '41603', '174442', '134510', '71812']
21514 ['75857', '158734']
1110 ['128461', '106785']
10103 ['97948', '103033', '98825', '46510', '125960']
613 ['99511', '37217', '81057', '104686']
4725 ['78505', '58653', '34154', '55422']
16877 ['145613']
15135 ['115278', '15173', '45546', '102078']
15209 ['15405', '38394']
6224 ['47546', '139505', '3222', '71362', '11233']
24393 ['79095']
14104 ['161312', '89113']
8339 ['96558']
19651 ['166767', '171717']
6988 ['174765', '101200', '51836', '106975', '69390']
6145 ['6486', '57713', '166358']
8461 ['20822']


21839 ['143710', '104345']
12409 ['91485', '56952', '143510']
17992 ['122384']
17930 ['79999']
10045 ['54421']
14128 ['13476', '92581']
13387 ['166441']
20163 ['142851', '115790']
8200 ['109110', '103427', '14827']
5211 ['125954', '94904', '63569']
6905 ['17336', '158292', '3814', '138882', '104686']
5514 ['97201']
16031 ['17258', '150229', '46490']
1465 ['15059']
18579 ['135961']
23469 ['102513']
13833 ['160230', '12967']
3074 ['15173', '125637']
22020 ['73956']
2243 ['89947', '15295', '9545']
9911 ['62058']
4751 ['129711']
16960 ['87938']
21883 ['48416']
18036 ['69658', '43096', '60487']
12328 ['130108', '63193', '83948', '32308']
11735 ['101792']
18049 ['60418', '54149']
22801 ['55596']
5562 ['29283', '63625', '88428', '114152']
21920 ['42988']
16161 ['146575', '121145']
16202 ['165944', '32200', '46798', '54545', '43692']
13695 ['9638', '103845', '145443', '140371']
23255 ['67106', '134610']
19408 ['124592']
7701 ['41692', '54626']
556 ['118543', '92071', '108330', '166860', '65626

14942 ['15059']
18595 ['52334', '139669', '65992', '67106', '106092', '115127']
21846 ['37359', '163923']
15922 ['92105']
4192 ['29165']
6110 ['51957', '49994', '3165', '72380', '147507']
8192 ['79095']
18308 ['107864', '151258']
9153 ['98499', '67461']
18689 ['28839', '106885']
4859 ['38195', '105732', '162529', '97305', '8003']
15933 ['89623']
15553 ['172070']
4737 ['43103', '34828']
7731 ['125296', '112683', '32977']
6800 ['12260']
12947 ['72253']
21740 ['41722']
12020 ['155950', '120253']
11502 ['106484', '36503', '62176']
6055 ['101004']
8805 ['38031']
11238 ['118253', '82904']
15198 ['170975', '40599', '15381']
11052 ['107185', '52334']
821 ['142559', '43959', '14846', '97738', '64261', '143522', '19812']
17083 ['24639']
19838 ['31762']
9951 ['46234', '65543']
4080 ['167993']
17978 ['23850']
23868 ['157605']
24871 ['103556', '101819']
23015 ['109709']
13591 ['30304', '52580', '172449']
23596 ['135400', '97372', '60720']
4201 ['22380', '163671']
19355 ['120789']
21441 ['68409']
23

15572 ['89218', '86923', '97372']
4738 ['946', '121589', '121513', '52732']
8075 ['121494', '40634', '4827']
11402 ['134610', '129595']
22726 ['176033', '112245']
13893 ['71705', '50909', '82705', '46699']
9888 ['101121']
14653 ['159031']
12682 ['55237']
21353 ['43298']
15693 ['88941']
11073 ['171328', '78593']
19059 ['52114', '118418', '71578']
759 ['138873']
10125 ['20208', '7035']
10570 ['73953']
20686 ['107299']
20162 ['144965', '134610']
14596 ['145240', '16763']
18507 ['56425', '174552']
8556 ['5289', '137409']
2968 ['35095']
18555 ['135071', '82665', '19812']
11195 ['158973', '29880', '14774']
20144 ['32720', '57943', '70129', '112494']
23528 ['166809']
15838 ['87272']
13720 ['12693', '52294']
472 ['193', '158', '68712']
25008 ['129193']
839 ['171132', '120292', '279', '131118']
10724 ['33228']
1448 ['123732', '128926', '62822', '80262']
15088 ['15173', '89113']
19052 ['86859']
16696 ['160849']
15123 ['15173']
17280 ['52334']
9205 ['164498', '80859']
7406 ['160596', '99301']
985

19383 ['29433']
9405 ['64169', '13433', '72659', '35278', '23212', '90751']
14702 ['50710', '129420', '73953']
14341 ['13778', '126374', '129495', '108289', '135961', '164131']
15616 ['16362']
13500 ['19634']
4815 ['61725', '59726']
23937 ['173354', '137499']
25000 ['135781']
1658 ['37359']
3687 ['1571']
7157 ['135961']
18647 ['100528']
21557 ['55809', '177287']
4889 ['47877', '84944', '81116']
15305 ['107768', '76391']
19141 ['55226']
24531 ['152154']
9454 ['168258']
20451 ['74840']
11184 ['92292']
20276 ['125637', '130650']
20063 ['129505']
6104 ['3165']
7090 ['51699', '134610']
21788 ['42154', '176819']
17321 ['21400']
14237 ['17926']
20425 ['54986', '77495']
6004 ['125637']
5008 ['45968', '75291', '19297']
20777 ['142634']
8234 ['37047']
9900 ['71578', '22132']
14971 ['172479', '15059']
1894 ['708']
20067 ['33994', '87061', '77807']
24392 ['79091', '80962']
14373 ['105272', '92623']
12410 ['74637']
17564 ['106975', '19770']
11929 ['78570', '109840']
8216 ['50810']
24175 ['150781']


23051 ['75990']
7865 ['154054', '24869', '156373']
17119 ['176732', '34283']
5896 ['90496', '2964', '52334']
18820 ['118496']
6311 ['106975']
11703 ['142345']
15242 ['127080', '92547']
14469 ['149428']
18164 ['58858']
14475 ['74633', '153514']
22047 ['43875']
15475 ['19316']
13701 ['12647']
11582 ['104261']
12399 ['101200']
12360 ['166665']
18976 ['19297', '114273']
8481 ['60638']
8471 ['74633', '135961']
23301 ['56077']
22108 ['154815']
16808 ['42272']
14554 ['101200']
18840 ['37657', '132334']
8880 ['151162']
18915 ['32114']
22702 ['61982']
21638 ['40976', '143067', '53200']
14935 ['72193', '68869']
21464 ['174245']
20334 ['162629']
20293 ['33459']
22812 ['47877']
15890 ['16912']
24472 ['115281']
11621 ['87041']
19970 ['145114']
17970 ['97240']
17789 ['173068']
9298 ['140095']
10087 ['119956', '62660', '164261', '7012']
256 ['105']
16982 ['141665', '131010']
22123 ['124579']
15388 ['21326']
19966 ['146117']
4733 ['108274', '45833']
4825 ['164837']
14967 ['40699']
13346 ['172959', '77

24847 ['134610']
24009 ['134610']
17764 ['163625']
23325 ['58218', '91243']
16799 ['140898']
23400 ['147447']
18608 ['118657']
8046 ['90424']
15258 ['118227']
1407 ['95864']
20966 ['66298']
10742 ['81302', '49271']
15320 ['109709']
12411 ['136562']
11588 ['110568', '25946', '40733']
19841 ['144201']
16689 ['19316']
14240 ['117899', '13565']
20617 ['139382']
9580 ['73493']
18826 ['67898', '77471']
23929 ['164450']
11264 ['77082']
19185 ['52245']
24786 ['90709']
22056 ['114480', '119749']
1297 ['44557']
18969 ['37047']
15313 ['10112']
9917 ['6732', '89113']
21312 ['170672', '123953']
24815 ['98000']
22984 ['52779']
13824 ['96348']
19776 ['133220']
18460 ['145695']
21997 ['39353']
18513 ['164596']
20670 ['77620']
21654 ['41070']
20177 ['118498', '32888']
9940 ['6773']
10139 ['7067']
17035 ['81783']
16562 ['55324']
21465 ['154683']
17070 ['56106', '51574']
21122 ['106785']
6545 ['3459', '118496']
24616 ['133818']
18575 ['52727']
2714 ['148461']
13754 ['119444']
20418 ['161211']
19296 ['118

19958 ['137184']
14742 ['68803']
20536 ['51043']
10899 ['76859']
18448 ['61001']
19379 ['156212']
23223 ['117099']
19431 ['101819', '128088']
10413 ['46177']
5515 ['9796']
15434 ['126894']
15787 ['23653']
16985 ['144458']
5664 ['29010']
18485 ['126645']
23378 ['67342']
21342 ['42988', '86227']
24961 ['160358']
16310 ['111822', '28768', '114359']
11708 ['9204']
18315 ['165388']
4098 ['50174', '26770']
12323 ['84796', '170771']
17396 ['55559']
12590 ['150120']
21065 ['117899']
7105 ['4025']
835 ['176565']
10114 ['6999']
18190 ['149428']
20040 ['142100']
1739 ['670']
18823 ['51263']
22446 ['171244']
3390 ['27243']
22730 ['50965']
7978 ['4790', '145792']
20484 ['143353']
19092 ['90158']
17456 ['25524']
7582 ['45217']
8129 ['143864']
19117 ['83404']
18309 ['46718']
6536 ['56425']
13502 ['28273']
23323 ['124831']
10161 ['7085']
12187 ['31894']
20243 ['167419']
21053 ['101040']
15114 ['15173']
11796 ['147180']
436 ['79996']
16263 ['56871']
10477 ['28552']
12371 ['49283']
19857 ['133911']
1381

## Apply Constraint

In [15]:
ingr_const = pd.read_csv('../data/ingr_const.csv')
ingr_const.set_index('u', inplace = True)

In [16]:
ingr_const['include'] = ingr_const['include'].str.replace(" ", "")
ingr_const['include'] = ingr_const['include'].apply(lambda x: x[1:-1].split(','))
ingr_const['exclude'] = ingr_const['exclude'].str.replace(" ", "")
ingr_const['exclude'] = ingr_const['exclude'].apply(lambda x: x[1:-1].split(','))
ingr_const.head()

Unnamed: 0_level_0,include,exclude
u,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"[1833, 1257, 335, 5695, 6335]","[63, 1168, 335, 7557, 6696]"
1,"[590, 2832, 6324, 1910, 298]","[800, 4253, 7449, 7557, 4623]"
2,"[2683, 3497, 7470, 2131, 1329]","[2499, 5006, 7655, 4717, 5319]"
3,"[7367, 1257, 4096, 3440, 4623]",[]
4,"[5319, 3440, 5825, 800, 5298]",[5648]


In [17]:
food_const = pd.read_csv('../data/food_const.csv')
food_const.head()

Unnamed: 0,food
0,99787
1,134610
2,135961
3,117899
4,147374


In [18]:
nutr_const = pd.read_csv('../data/nutr_const.csv')
nutr_const.head()

Unnamed: 0,calories,total fat,sugar,sodium,protein,saturated fat,carbohydrates
0,1333.333333,66.666667,66.666667,66.666667,66.666667,66.666667,66.666667
1,1600.0,66.666667,66.666667,66.666667,66.666667,66.666667,66.666667
2,1866.666667,66.666667,66.666667,66.666667,66.666667,66.666667,66.666667


In [19]:
recipe_data = pd.read_csv('../data/recipe_data.csv')
recipe_data.set_index('fid', inplace = True)
recipe_data.head()

Unnamed: 0_level_0,name,full_id,nutrition,ingredient_ids
fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,white bean green chile pepper soup,40893,"[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]","[3384, 7979, 2127, 3502, 3217, 1257, 2778, 500..."
1,devilicious cookie cake delights,44394,"[132.3, 11.0, 39.0, 5.0, 4.0, 11.0, 5.0]","[912, 7557, 2499, 5382]"
2,baked potato toppings,85009,"[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]","[4623, 6265, 1168, 6016, 3597, 3440, 7213, 169..."
3,kfc honey bbq strips,134728,"[316.0, 4.0, 40.0, 37.0, 78.0, 4.0, 10.0]","[1304, 2683, 3217, 6270, 3532, 869, 7557, 3698..."
4,lamb stew with tomatoes chickpeas and spices,200236,"[606.5, 65.0, 12.0, 34.0, 65.0, 83.0, 7.0]","[4130, 6270, 3486, 7557, 5010, 3203, 2683, 125..."


In [20]:
recipe_data['ingredient_ids'] = recipe_data['ingredient_ids'].str.replace(" ", "")
recipe_data['ingredient_ids'] = recipe_data['ingredient_ids'].apply(lambda x: x[1:-1].split(','))
recipe_data['nutrition'] = recipe_data['nutrition'].str.replace(" ", "")
recipe_data['nutrition'] = recipe_data['nutrition'].apply(lambda x: x[1:-1].split(','))

In [21]:
def select_random(x, k): # select k value from [0, x)
    pick = []
    while len(pick) < k:
        p = random.randint(0, x-1)
        if p not in pick:
            pick.append(p)
    return pick

### Constraint 1: include ingredient

In [22]:
# fid, iid is string
def include_ingr(fid, iid): 
    return iid in recipe_data.loc[int(fid)].ingredient_ids

In [23]:
user_list = select_random(len(ingr_const), u_sample)
user_list

[16705, 19465, 21233, 14354, 21936]

In [24]:
ingr_list = {}
for u in user_list:
    ingr_list[u] = []
    ingr_pick = select_random(len(ingr_const.loc[u].include), incl_ingr_sample)
    for p in ingr_pick:
        ingr_list[u].append(int(ingr_const.loc[u].include[p]))

In [25]:
ingr_list

{16705: [3184, 1168, 342],
 19465: [7655, 3668, 3723],
 21233: [4964, 1124, 4987],
 14354: [7557, 6270, 4623],
 21936: [4836, 3502, 5966]}

In [26]:
recommend_list = {}
for u in user_list:
    for i in ingr_list[u]:
        recommend_list[(u, i)] = []
        top_k = top_N_recommend[str(u)]
        count = 0
        for f in top_k:
            if include_ingr(f[0], str(i)):
                recommend_list[(u, i)].append(f[0])
                count = count + 1
            if count > top_N_const:
                break

In [27]:
recommend_list

{(16705, 3184): ['106973', '55237'],
 (16705, 1168): [],
 (16705, 342): [],
 (19465, 7655): [],
 (19465, 3668): [],
 (19465, 3723): [],
 (21233, 4964): [],
 (21233, 1124): [],
 (21233, 4987): [],
 (14354, 7557): [],
 (14354, 6270): [],
 (14354, 4623): [],
 (21936, 4836): [],
 (21936, 3502): [],
 (21936, 5966): []}

In [28]:
data1 = {'user': [], 'include': [], 'item': [], 'no constraint': []}
for r in recommend_list:
    data1['user'].append(r[0])
    data1['include'].append(r[1])
    data1['item'].append(recommend_list[r])
    fl = []
    for i in top_N_recommend[str(r[0])]:
        fl.append(i[0])
    data1['no constraint'].append(fl)

In [29]:
data1['item']

[['106973', '55237'], [], [], [], [], [], [], [], [], [], [], [], [], [], []]

In [30]:
data1['no constraint']

[['106973', '55237'],
 ['106973', '55237'],
 ['106973', '55237'],
 ['88620', '65499', '29917'],
 ['88620', '65499', '29917'],
 ['88620', '65499', '29917'],
 ['101065', '99787'],
 ['101065', '99787'],
 ['101065', '99787'],
 [],
 [],
 [],
 ['110902', '139776', '97535', '145534', '173080'],
 ['110902', '139776', '97535', '145534', '173080'],
 ['110902', '139776', '97535', '145534', '173080']]

In [31]:
sample1 = pd.DataFrame(data1)
sample1

Unnamed: 0,user,include,item,no constraint
0,16705,3184,"[106973, 55237]","[106973, 55237]"
1,16705,1168,[],"[106973, 55237]"
2,16705,342,[],"[106973, 55237]"
3,19465,7655,[],"[88620, 65499, 29917]"
4,19465,3668,[],"[88620, 65499, 29917]"
5,19465,3723,[],"[88620, 65499, 29917]"
6,21233,4964,[],"[101065, 99787]"
7,21233,1124,[],"[101065, 99787]"
8,21233,4987,[],"[101065, 99787]"
9,14354,7557,[],[]


### Constraint 2: exclude ingredient

In [32]:
# fid, iid is string
def exclude_ingr(fid, iid):
    return not iid in recipe_data.loc[int(fid)].ingredient_ids

In [33]:
user_list = select_random(len(ingr_const), u_sample)

In [61]:
user_list

[588, 3657, 2419, 5795, 23157]

In [34]:
ingr_list = {}
for u in user_list:
    ingr_list[u] = []
    ingr_pick = select_random(len(ingr_const.loc[u].exclude), incl_ingr_sample)
    for p in ingr_pick:
        ingr_list[u].append(int(ingr_const.loc[u].exclude[p]))

In [35]:
recommend_list = {}
for u in user_list:
    for i in ingr_list[u]:
        recommend_list[(u, i)] = []
        top_k = top_N_recommend[str(u)]
        count = 0
        for f in top_k:
            if exclude_ingr(f[0], str(i)):
                recommend_list[(u, i)].append(f[0])
                count = count + 1
            if count > top_N_const:
                break

In [36]:
data2 = {'user': [], 'exclude': [], 'item': [], 'no constraint': []}
for r in recommend_list:
    data2['user'].append(r[0])
    data2['exclude'].append(r[1])
    data2['item'].append(recommend_list[r])
    fl = []
    for i in top_N_recommend[str(r[0])]:
        fl.append(i[0])
    data2['no constraint'].append(fl)

In [37]:
sample2 = pd.DataFrame(data2)
sample2

Unnamed: 0,user,exclude,item,no constraint
0,1155,800,"[109667, 64428, 154611, 72482]","[109667, 64428, 154611, 72482]"
1,1155,840,"[109667, 64428, 154611, 72482]","[109667, 64428, 154611, 72482]"
2,1155,4253,"[109667, 64428, 154611, 72482]","[109667, 64428, 154611, 72482]"
3,16203,7655,"[130417, 135961, 149428]","[130417, 135961, 149428]"
4,16203,332,"[130417, 135961, 149428]","[130417, 135961, 149428]"
5,16203,3184,"[130417, 135961, 149428]","[130417, 135961, 149428]"
6,1598,7470,"[66593, 103986, 109508, 166377, 156352, 78551,...","[66593, 103986, 109508, 166377, 156352, 78551,..."
7,1598,1511,"[66593, 103986, 109508, 166377, 156352, 78551,...","[66593, 103986, 109508, 166377, 156352, 78551,..."
8,1598,1093,"[66593, 103986, 109508, 166377, 156352, 78551,...","[66593, 103986, 109508, 166377, 156352, 78551,..."
9,22480,7655,[],[]


### Constraint 3: satisfy nutritent

In [57]:
def satisfy_nutr(fid, target_nutr):
    nutr = recipe_data.loc[int(fid)].nutrition
    for i in range (0, len(nutr)):
        target = target_nutr[i]
        value = float(nutr[i])
        if value < (target * (1 - (nutr_error/100))):
            return False
        if (target * (1 + (nutr_error/100))) < value :
            return False     
    return True

In [59]:
user_list = select_random(len(ingr_const), u_sample)

In [62]:
len(nutr_const)

3

In [63]:
nutr_const.loc[0]

calories         1333.333333
total fat          66.666667
sugar              66.666667
sodium             66.666667
protein            66.666667
saturated fat      66.666667
carbohydrates      66.666667
Name: 0, dtype: float64

In [66]:
nutr_list = {}
for u in user_list:
    nutr_list[u] = []
    for x in range (0, len(nutr_const)):
        nutr_list[u].append(nutr_const.loc[x])

In [67]:
nutr_list

{588: [calories         1333.333333
  total fat          66.666667
  sugar              66.666667
  sodium             66.666667
  protein            66.666667
  saturated fat      66.666667
  carbohydrates      66.666667
  Name: 0, dtype: float64,
  calories         1600.000000
  total fat          66.666667
  sugar              66.666667
  sodium             66.666667
  protein            66.666667
  saturated fat      66.666667
  carbohydrates      66.666667
  Name: 1, dtype: float64,
  calories         1866.666667
  total fat          66.666667
  sugar              66.666667
  sodium             66.666667
  protein            66.666667
  saturated fat      66.666667
  carbohydrates      66.666667
  Name: 2, dtype: float64],
 3657: [calories         1333.333333
  total fat          66.666667
  sugar              66.666667
  sodium             66.666667
  protein            66.666667
  saturated fat      66.666667
  carbohydrates      66.666667
  Name: 0, dtype: float64,
  calories  

In [78]:
recommend_list = {}
for u in user_list:
    for i in range (0, len(nutr_list[u])):
        recommend_list[(u, i)] = []
        top_k = top_N_recommend[str(u)]
        count = 0
        for f in top_k:
            if satisfy_nutr(f[0], nutr_list[u][i]):
                recommend_list[(u, i)].append(f[0])
                count = count + 1
            if count > top_N_const:
                break

In [76]:
recommend_list

{(588, 0): [],
 (588, 1): [],
 (588, 2): [],
 (3657, 0): [],
 (3657, 1): [],
 (3657, 2): [],
 (2419, 0): [],
 (2419, 1): [],
 (2419, 2): [],
 (5795, 0): [],
 (5795, 1): [],
 (5795, 2): [],
 (23157, 0): [],
 (23157, 1): [],
 (23157, 2): []}

In [84]:
data3 = {'user': [], 'nutrition target': [], 'item': [], 'no constraint': []}
for r in recommend_list:
    data3['user'].append(r[0])
    data3['nutrition target'].append(r[1])
    data3['item'].append(recommend_list[r])
    fl = []
    for i in top_N_recommend[str(r[0])]:
        fl.append(i[0])
    data3['no constraint'].append(fl)

In [85]:
sample3 = pd.DataFrame(data3)
sample3

Unnamed: 0,user,nutrition target,item,no constraint
0,588,0,[],[203]
1,588,1,[],[203]
2,588,2,[],[203]
3,3657,0,[],[86955]
4,3657,1,[],[86955]
5,3657,2,[],[86955]
6,2419,0,[],"[17320, 44924, 60896, 170377, 37758, 12685, 66..."
7,2419,1,[],"[17320, 44924, 60896, 170377, 37758, 12685, 66..."
8,2419,2,[],"[17320, 44924, 60896, 170377, 37758, 12685, 66..."
9,5795,0,[],"[124557, 92525, 12947]"


### Modulize

In [8]:
import load_data as ld
from surprise.model_selection import train_test_split

In [9]:
data = ld.load_rating_data()

In [10]:
train_set, test_set = train_test_split(data, test_size=.25)