In [2]:
import pandas as pd
from surprise import Reader
from surprise import Dataset

In [5]:
# load rating data with surprise
def load_rating_data(file_path='../data/rating_data.csv'):
    reader = Reader(line_format='user item rating', sep=',', rating_scale=(0, 5))
    return Dataset.load_from_file(file_path, reader = reader)

In [6]:
data = load_rating_data()

In [7]:
from surprise.model_selection import train_test_split
train_set, test_set = train_test_split(data, test_size=0.25)

In [95]:
# cast list of string to list of integer
def int_cast(str_list):
    for i in range (0, len(str_list)):
        if(str_list[i]==''):
            continue
        str_list[i] = int(str_list[i])
    return str_list

In [96]:
# cast list of string to list of float
def float_cast(str_list):
    for i in range (0, len(str_list)):
        if(str_list[i]==''):
            continue
        str_list[i] = float(str_list[i])
    return str_list

In [97]:
# load recipe data
def load_recipe_data(file_path="../data/recipe_data.csv"):
    df = pd.read_csv(file_path)
    df.set_index('fid', inplace = True)
    
    # parse ingredient list
    df['ingredient_ids'] = df['ingredient_ids'].str.replace(" ", "")
    df['ingredient_ids'] = df['ingredient_ids'].apply(lambda x: x[1:-1].split(','))
    df['ingredient_ids'] = df['ingredient_ids'].apply(lambda x: int_cast(x))
    df['nutrition'] = df['nutrition'].str.replace(" ", "")
    df['nutrition'] = df['nutrition'].apply(lambda x: x[1:-1].split(','))
    df['nutrition'] = df['nutrition'].apply(lambda x: float_cast(x))
    
    return df

In [99]:
df = load_recipe_data()
df.head()

Unnamed: 0_level_0,name,full_id,nutrition,ingredient_ids
fid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,white bean green chile pepper soup,40893,"[204.8, 5.0, 9.0, 26.0, 24.0, 2.0, 10.0]","[3384, 7979, 2127, 3502, 3217, 1257, 2778, 500..."
1,devilicious cookie cake delights,44394,"[132.3, 11.0, 39.0, 5.0, 4.0, 11.0, 5.0]","[912, 7557, 2499, 5382]"
2,baked potato toppings,85009,"[2786.2, 342.0, 134.0, 290.0, 161.0, 301.0, 42.0]","[4623, 6265, 1168, 6016, 3597, 3440, 7213, 169..."
3,kfc honey bbq strips,134728,"[316.0, 4.0, 40.0, 37.0, 78.0, 4.0, 10.0]","[1304, 2683, 3217, 6270, 3532, 869, 7557, 3698..."
4,lamb stew with tomatoes chickpeas and spices,200236,"[606.5, 65.0, 12.0, 34.0, 65.0, 83.0, 7.0]","[4130, 6270, 3486, 7557, 5010, 3203, 2683, 125..."


In [101]:
df.loc[0]['nutrition'][0]

204.8

In [102]:
df.loc[0]['ingredient_ids'][0]

3384

In [84]:
# load ingredient related constraint data
def load_ingr_const(file_path="../data/ingr_const.csv"):
    df = pd.read_csv(file_path)
    df.set_index('u', inplace = True)
    
    # parse ingredient list
    df['include'] = df['include'].str.replace(" ", "")
    df['include'] = df['include'].apply(lambda x: x[1:-1].split(','))
    df['include'] = df['include'].apply(lambda x: int_cast(x))
    df['exclude'] = df['exclude'].str.replace(" ", "")
    df['exclude'] = df['exclude'].apply(lambda x: x[1:-1].split(','))
    df['exclude'] = df['exclude'].apply(lambda x: int_cast(x))
    
    return df

In [103]:
df = load_ingr_const()
df.head()

Unnamed: 0_level_0,include,exclude
u,Unnamed: 1_level_1,Unnamed: 2_level_1
0,"[1833, 1257, 335, 5695, 6335]","[63, 1168, 335, 7557, 6696]"
1,"[590, 2832, 6324, 1910, 298]","[800, 4253, 7449, 7557, 4623]"
2,"[2683, 3497, 7470, 2131, 1329]","[2499, 5006, 7655, 4717, 5319]"
3,"[7367, 1257, 4096, 3440, 4623]",[]
4,"[5319, 3440, 5825, 800, 5298]",[5648]


In [88]:
# load food related constraint data
def load_food_const(file_path="../data/food_const.csv"):
    df = pd.read_csv(file_path)    
    return df

In [92]:
df = load_food_const()
df.head()

Unnamed: 0,food
0,99787
1,134610
2,135961
3,117899
4,147374
