In [1]:
from pymongo import MongoClient
import json
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from prince import FAMD
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def get_database():
    CONNECTION_STRING = 'mongodb://localhost:27017/'

    client = MongoClient(CONNECTION_STRING)
    print(client.server_info())

    return client['4300project']

In [3]:
def expand_column(df, col):
    """Expand a simple column of lists into multiple discrete columns"""
    max_len = max(map(len, df[col].values))
    cols = [f'{col}_{x}' for x in range(max_len)]
    df[cols] = df[col].apply(lambda x: pd.Series(x))
    df = df.drop(columns=col)
    return df

In [4]:
def clean_columns(collection):
    # create dataframe, drop unnecessary columns
    df = pd.DataFrame(list(collection.find()))
    pca_drops = ['url', 'recipeType', 'steps']
    df = df.drop(columns=pca_drops)

    # expand columns that are simple lists into individual columns
    list_cols = ['keywords', 'dish', 'course', 'technique', 'cuisine']
    for col in list_cols:
        df = expand_column(df, col)

    # complex cols
    complex = ['ingredients','nutrition'] #'reviews']

    # expand ingredients column
    df['ingredients'] = df.apply(lambda row: [x['name'] for x in row['ingredients']], axis=1)
    df = expand_column(df, 'ingredients')

    # expand nutrition column
    df['nutrition'] = df.apply(lambda row: [f"{x['value']} {x['name']}" for x in row['nutrition']], axis=1)
    df = expand_column(df, 'nutrition')

    return df

In [5]:
def run_pca(collection):
    org_df = pd.DataFrame(list(collection.find()))
    df = clean_columns(collection)

    # drop empties
    nums = df.select_dtypes(include=[np.number]).columns
    strs = df.select_dtypes(exclude=[np.number]).columns
    df[nums] = df[nums].fillna(0)
    df[strs] = df[strs].fillna('N/A')

    famd = FAMD().fit(df)
    fit_df = famd.transform(df)

    df = pd.concat([org_df, fit_df], axis=1).reset_index(drop=True)

    return df

In [6]:
if __name__ == '__main__':
    dbname = get_database()
    collection = dbname['recipes']
    collection.delete_many({})

    with open('recipe_data.json') as data_file:
        data = json.load(data_file)

    collection.insert_many([item for item in data])
    
    df = run_pca(collection)

    uri = 'bolt://localhost:7687'
    user = 'neo4j'
    password = 'neo4ANJALI'
    driver = GraphDatabase.driver(uri, auth=(user, password))

    # query data from Mongodb
    data1 = collection.find()

    with driver.session() as session:
        tx = session.begin_transaction()
        query = 'MATCH (n) DETACH DELETE n'
        tx.run(query)
        for record in data1:
            ingredients = []
            for ingredient in record['ingredients']:
                ingredients.append(ingredient['name'])
            fields = {'field1': record['name'], 'field2': record['url'], 'field3': record['recipeType'], 
                      'field4': record['keywords'], 'field5': record['description'],
                      'field6': record['steps'], 'field7': record['dish'], 'field8': record['course'],
                      'field9': record['technique'], 'field10': record['cuisine'], 
                      'field11': record['avgRating'], 'field12': record['numReviews'],
                      'field13': list(filter(None,ingredients)), 'field14': record['id']}                     
                     
            query = 'CREATE (recipe:recipes {name: $field1, url: $field2, recipeType: $field3, keywords: \
            $field4, description: $field5, steps: $field6, dish: $field7, course: $field8, \
            technique: $field9, cuisine: $field10, avgRating: $field11, numReviews: $field12, \
            ingredients: $field13, recipeId: $field14})'
            

            tx.run(query, **fields)
        tx.commit()
       



In [7]:
df

Unnamed: 0,_id,name,id,url,recipeType,keywords,description,steps,dish,course,technique,cuisine,ingredients,avgRating,numReviews,nutrition,0,1
0,643c6ee983bed2b118322dc2,Grilled Swordfish with Chimichurri Sauce,d612b0ec-a51e-42cd-be72-ee81918fe457,https://www.yummly.com/recipe/Grilled-Swordfis...,YummlyOriginal,[],"The lively Latin American herb, lemon, and chi...",[Preheat a grill for medium heat (350° to 450°...,[],[Main Dishes],[Grilling],[Barbecue],"[{'name': 'swordfish steaks', 'category': 'Sea...",5.000000,1,"[{'name': 'FAT_KCAL', 'value': '260.0 kcal', '...",3.268091,-6.709241
1,643c6ee983bed2b118322dc3,Tamales,8082bcf8-efb9-48dc-aee8-1a134be12044,https://www.yummly.com/recipe/Tamales-2691200,BasicRecipe,"[tamales, chicken tamales, tamale, corn tamale...",,[],[Tamale],[],[],[],"[{'name': 'dried corn husks', 'category': 'Glo...",5.000000,1,"[{'name': 'FAT_KCAL', 'value': '120.0 kcal', '...",-31.606482,-16.247251
2,643c6ee983bed2b118322dc4,Homemade Hummus Dip,348069a1-39ef-4dc2-8c9d-8b34e4a7a97b,https://www.yummly.com/recipe/Homemade-Hummus-...,BasicRecipe,"[hummus dip, dip, dips, no bake dips, humus di...",,[],[Dips and Spreads],[Appetizers],[Blending],[],"[{'name': 'garlic', 'category': 'Produce', 'qu...",4.478261,23,"[{'name': 'FAT_KCAL', 'value': '130.0 kcal', '...",-2.573522,33.763098
3,643c6ee983bed2b118322dc5,Easy Green Bean Casserole,3bbc13c6-87bc-460d-b692-7b7ee7eea581,https://www.yummly.com/recipe/Easy-Green-Bean-...,YummlyOriginal,[],"For many families, no holiday meal is complete...","[If green beans are still frozen, spread them ...","[Green Bean Casserole, Casserole]",[Side Dishes],[Baking],[],"[{'name': 'frozen green beans', 'category': 'F...",,0,"[{'name': 'FAT_KCAL', 'value': '110.0 kcal', '...",-1.033918,-4.799344
4,643c6ee983bed2b118322dc6,Perfect New York Cheesecake,bfe10384-676b-49bc-b239-fa6a2655e99b,https://www.yummly.com/recipe/Perfect-New-York...,BasicRecipe,"[cheesecake, new york cheesecake, cheese cake,...",,[],[Cheesecake],[Desserts],[Baking],[],"[{'name': 'crust', 'category': 'Bakery', 'quan...",3.000000,2,"[{'name': 'FAT_KCAL', 'value': '310.0 kcal', '...",53.852178,30.201939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
488,643c6ee983bed2b118322faa,Stovetop Chicken Parm with Herby Angel Hair,24a7b66b-4a53-4eda-82e7-9b41f7eb2b7d,https://www.yummly.com/recipe/Stovetop-Chicken...,BasicRecipe,"[chicken breast and canned tomatoes, chicken p...","You need a flour bowl, a bowl for milk and egg...","[Place a large, non-stick pan over high heat a...",[],[Main Dishes],[Boiling],[],"[{'name': 'chicken cutlets', 'category': 'Meat...",4.656250,32,[],36.893116,-19.543162
489,643c6ee983bed2b118322fab,Easy Shredded Chicken Breast Hack,158e4c48-330f-49ef-94a6-6ac16e5ec340,https://www.yummly.com/recipe/Easy-Shredded-Ch...,YummlyOriginal,[],"To plan for busy weeknights, prepping ahead is...",[Preheat the oven to 350°F. Line a sheet pan w...,[],[Lunch],[Baking],[],"[{'name': 'boneless skinless chicken breasts',...",5.000000,6,"[{'name': 'FAT_KCAL', 'value': '25.0 kcal', 'p...",-1.576992,-2.489161
490,643c6ee983bed2b118322fac,Italian Cupboard Soup,d93fc24e-f7d7-438a-9d88-c595115b2bb3,https://www.yummly.com/recipe/Italian-Cupboard...,BasicRecipe,"[soup, soups, pork soup, all soups, italian so...",Complete this soup with warmed Italian bread a...,"[In a deep saucepan, brown the pork in a littl...",[],[Soups],"[Browning, Boiling]",[],"[{'name': 'boneless pork chops', 'category': '...",4.562500,16,"[{'name': 'FAT_KCAL', 'value': '25.0 kcal', 'p...",-65.449569,52.917527
491,643c6ee983bed2b118322fad,American Pride Pork Chop,6f2c5e64-be60-45c4-b5fa-f4f196618bd3,https://www.yummly.com/recipe/American-Pride-P...,BasicRecipe,"[pork chops, pork chop, with pork with pork ch...",,"[Combine the brown sugar, salt, pepper, allspi...",[],[Main Dishes],[Grilling],[],"[{'name': 'bone-in ribeye (rib) pork chops', '...",4.833333,6,"[{'name': 'K', 'value': '5.0 g', 'pctDailyValu...",-8.469916,15.861702


In [8]:
edges = pd.read_csv('edges.csv')

In [9]:
edges

Unnamed: 0,id_1,id_2,similarity
0,d612b0ec-a51e-42cd-be72-ee81918fe457,b0d71782-172b-4c49-b3b4-e79b651335a5,0.035702
1,d612b0ec-a51e-42cd-be72-ee81918fe457,4c3b6639-4683-427c-8b00-012874f8feea,0.049634
2,d612b0ec-a51e-42cd-be72-ee81918fe457,c394cf4d-6d8d-4b0e-9f0d-25705c323b4e,0.068675
3,d612b0ec-a51e-42cd-be72-ee81918fe457,8bc36f92-5491-4efe-9168-0c196693312b,0.071386
4,d612b0ec-a51e-42cd-be72-ee81918fe457,60e23c19-b7e5-4ff4-85ee-a3dddb84ad98,0.090891
...,...,...,...
9855,1b1420c6-cd00-4c22-93f8-12087488cb98,23c946c8-12e9-44d0-a97f-a2848f4dde1f,0.134798
9856,1b1420c6-cd00-4c22-93f8-12087488cb98,968f804f-bf76-4b8e-83a2-d73bae4eecc3,0.137002
9857,1b1420c6-cd00-4c22-93f8-12087488cb98,e4db9472-3e11-4ae8-bdf3-25b55de76ab9,0.137737
9858,1b1420c6-cd00-4c22-93f8-12087488cb98,8f4fe136-a146-4be6-aab7-00fc9c0e8d75,0.139382


In [23]:
df[df['_id'] == 'd612b0ec-a51e-42cd-be72-ee81918fe457']

Unnamed: 0,_id,name,id,url,recipeType,keywords,description,steps,dish,course,technique,cuisine,ingredients,avgRating,numReviews,nutrition,0,1


In [11]:
edges.loc[edges.id_1 == '64388ae1c2ca552ddb657336']

Unnamed: 0,id_1,id_2,similarity


In [12]:
recipeTypes = ['YummlyOriginal', 'BasicRecipe', 'ProRecipe', 'GuidedRecipe']

In [13]:
courses = ['Main Dishes','Appetizers', 'Side Dishes', 'Desserts', 'Beverages',
 'Salads', 'Breakfast and Brunch', 'Soups', 'Condiments and Sauces', 'Breads',
 'Lunch', 'Cocktails']

In [14]:
technique = ['Grilling', 'Blending', 'Baking', 'Boiling', 'Browning', 'Glazing',
 'Roasting', 'Microwaving', 'Broiling', 'Frying', 'Drying', 'Sauteeing',
 'Slow Cooking', 'Marinating', 'Steaming', 'Frosting', 'Pressure Cooking',
 'Braising', 'Pickling', 'Stir Frying', 'Brining']

In [15]:
cuisine = ['Barbecue', 'Turkish', 'Kid-Friendly', 'American', 'Indian',
 'Southern & Soul Food', 'Italian', 'Chinese', 'Asian', 'Greek', 'Southwestern',
 'Mexican', 'Moroccan', 'Puerto rican' 'Filipino', 'Japanese', 'Thai' 'Korean',
 'English', 'French', 'Jewish', 'Cajun & Creole', 'Caribbean', 'Arab', 'Cuban',
 'Mediterranean', 'Spanish']

In [16]:
dish = df['dish']

In [17]:
dish = pd.DataFrame(dish)

In [18]:
dish

Unnamed: 0,dish
0,[]
1,[Tamale]
2,[Dips and Spreads]
3,"[Green Bean Casserole, Casserole]"
4,[Cheesecake]
...,...
488,[]
489,[]
490,[]
491,[]


In [19]:
dish_exploded = dish.explode('dish')

In [20]:
dishes = dish_exploded['dish'].unique()

In [21]:
print(dishes)

[nan 'Tamale' 'Dips and Spreads' 'Green Bean Casserole' 'Casserole'
 'Cheesecake' 'Cobbler' 'Granola Bar' 'Bars' 'Smoothie' 'Waffles' 'Nachos'
 'French Toast' 'Pancakes' 'Sugar Cookies' 'Cookies' 'Chicken Tenders'
 'Meatballs' 'Juice' 'Ribs' 'Pot Pie' 'Pie' 'Frittata' 'Pulled Pork'
 'Tortilla Soup' 'Tacos' 'Wings' 'Lasagna' 'Chili' 'Brownies'
 'Oatmeal Raisin Cookies' 'Marinade' 'Salsa' 'Fried Chicken'
 'Beef Stroganoff' 'Scones' 'Pie Crust' 'Fudge' 'Cake' 'Goulash' 'Chips'
 'Egg Roll' 'Cupcake' 'Apple Pie' 'Tuna Salad' 'Garlic Bread' 'Grits'
 'One Pot' 'Icing' 'Vegetable Soup' 'Creamy Soup' 'Banana Bread'
 'Stir Fry' 'Mashed Potatoes' 'Jellies and Jams' 'Wraps' 'Roast' 'Ceviche'
 "Shepherd's Pie" 'Muffins' 'Gazpacho' 'Cinnamon Rolls' 'Fritters' 'Pesto'
 'Enchiladas' 'Meatloaf' 'Broccoli Casserole' 'Quiche' 'Baked Ziti'
 'Pot Roast' 'Baked Potato' 'Scampi' 'Pizza' 'Alfredo' 'Fries'
 'Stuffed Peppers' 'Roast Chicken' 'Gratin' 'Curry' 'Potato Salad'
 'Fajitas' 'Piccata' 'Potato Skins' 'R