In [1]:
from pymongo import MongoClient
import json
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
from prince import FAMD
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def get_database():
    CONNECTION_STRING = 'mongodb://localhost:27017/'

    client = MongoClient(CONNECTION_STRING)
    print(client.server_info())

    return client['4300project']

In [3]:
def expand_column(df, col):
    """Expand a simple column of lists into multiple discrete columns"""
    max_len = max(map(len, df[col].values))
    cols = [f'{col}_{x}' for x in range(max_len)]
    df[cols] = df[col].apply(lambda x: pd.Series(x))
    df = df.drop(columns=col)
    return df

In [4]:
def clean_columns(collection):
    # create dataframe, drop unnecessary columns
    df = pd.DataFrame(list(collection.find()))
    pca_drops = ['url', 'recipeType', 'steps']
    df = df.drop(columns=pca_drops)

    # expand columns that are simple lists into individual columns
    list_cols = ['keywords', 'dish', 'course', 'technique', 'cuisine']
    for col in list_cols:
        df = expand_column(df, col)

    # complex cols
    complex = ['ingredients','nutrition'] #'reviews']

    # expand ingredients column
    df['ingredients'] = df.apply(lambda row: [x['name'] for x in row['ingredients']], axis=1)
    df = expand_column(df, 'ingredients')

    # expand nutrition column
    df['nutrition'] = df.apply(lambda row: [f"{x['value']} {x['name']}" for x in row['nutrition']], axis=1)
    df = expand_column(df, 'nutrition')

    # expand ratings column
    #df['avgRating'] = df.apply(lambda row: row['reviews']['avgRating'], axis=1)
    #df['numReviews'] = df.apply(lambda row: row['reviews']['numReviews'], axis=1)
    #df = df.drop(columns=['reviews'])

    return df

In [5]:
def run_pca(collection):
    org_df = pd.DataFrame(list(collection.find()))
    df = clean_columns(collection)

    # drop empties
    nums = df.select_dtypes(include=[np.number]).columns
    strs = df.select_dtypes(exclude=[np.number]).columns
    df[nums] = df[nums].fillna(0)
    df[strs] = df[strs].fillna('N/A')

    famd = FAMD().fit(df)
    fit_df = famd.transform(df)

    df = pd.concat([org_df, fit_df], axis=1).reset_index(drop=True)

    return df

In [16]:
if __name__ == '__main__':
    dbname = get_database()
    collection = dbname['recipes']

    with open('recipe_data.json') as data_file:
        data = json.load(data_file)

    collection.insert_many([item for item in data])
    
    df = run_pca(collection)

    uri = 'bolt://localhost:7687'
    user = 'neo4j'
    password = 'neo4ANJALI'
    driver = GraphDatabase.driver(uri, auth=(user, password))

    # query data from Mongodb
    data1 = collection.find()

    with driver.session() as session:
        tx = session.begin_transaction()
        for record in data1:
            ingredients = []
            for ingredient in record['ingredients']:
                ingredients.append(ingredient['name'])
            fields = {'field1': record['name'], 'field2': record['url'], 'field3': record['recipeType'], 
                      'field4': record['keywords'], 'field5': record['description'],
                      'field6': record['steps'], 'field7': record['dish'], 'field8': record['course'],
                      'field9': record['technique'], 'field10': record['cuisine'], 
                      'field11': record['avgRating'], 'field12': record['numReviews'],
                      'field13': list(filter(None,ingredients))}                     
                     
            query = 'CREATE (recipe:recipes {name: $field1, url: $field2, recipeType: $field3, keywords: \
            $field4, description: $field5, steps: $field6, dish: $field7, course: $field8, \
            technique: $field9, cuisine: $field10, avgRating: $field11, numReviews: $field12, \
            ingredients: $field13})'

            tx.run(query, **fields)
        tx.commit()
       



In [17]:
df

Unnamed: 0,_id,name,url,recipeType,keywords,description,steps,dish,course,technique,cuisine,ingredients,avgRating,numReviews,nutrition,0,1
0,64388ae1c2ca552ddb657336,Grilled Swordfish with Chimichurri Sauce,https://www.yummly.com/recipe/Grilled-Swordfis...,YummlyOriginal,[],"The lively Latin American herb, lemon, and chi...",[Preheat a grill for medium heat (350° to 450°...,[],[Main Dishes],[Grilling],[Barbecue],"[{'name': 'swordfish steaks', 'category': 'Sea...",5.000000,1,"[{'name': 'FAT_KCAL', 'value': '260.0 kcal', '...",-5.032939,-3.749327
1,64388ae1c2ca552ddb657337,Tamales,https://www.yummly.com/recipe/Tamales-2691200,BasicRecipe,"[tamales, chicken tamales, tamale, corn tamale...",,[],[Tamale],[],[],[],"[{'name': 'dried corn husks', 'category': 'Glo...",5.000000,1,"[{'name': 'FAT_KCAL', 'value': '120.0 kcal', '...",14.213637,-11.867749
2,64388ae1c2ca552ddb657338,Homemade Hummus Dip,https://www.yummly.com/recipe/Homemade-Hummus-...,BasicRecipe,"[hummus dip, dip, dips, no bake dips, humus di...",,[],[Dips and Spreads],[Appetizers],[Blending],[],"[{'name': 'garlic', 'category': 'Produce', 'qu...",4.478261,23,"[{'name': 'FAT_KCAL', 'value': '130.0 kcal', '...",-52.474434,19.522786
3,64388ae1c2ca552ddb657339,Easy Green Bean Casserole,https://www.yummly.com/recipe/Easy-Green-Bean-...,YummlyOriginal,[],"For many families, no holiday meal is complete...","[If green beans are still frozen, spread them ...","[Green Bean Casserole, Casserole]",[Side Dishes],[Baking],[],"[{'name': 'frozen green beans', 'category': 'F...",,0,"[{'name': 'FAT_KCAL', 'value': '110.0 kcal', '...",-2.568933,6.288517
4,64388ae1c2ca552ddb65733a,Perfect New York Cheesecake,https://www.yummly.com/recipe/Perfect-New-York...,BasicRecipe,"[cheesecake, new york cheesecake, cheese cake,...",,[],[Cheesecake],[Desserts],[Baking],[],"[{'name': 'crust', 'category': 'Bakery', 'quan...",3.000000,2,"[{'name': 'FAT_KCAL', 'value': '310.0 kcal', '...",19.678367,-68.980116
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
981,64388c11c2ca552ddb65770c,Stovetop Chicken Parm with Herby Angel Hair,https://www.yummly.com/recipe/Stovetop-Chicken...,BasicRecipe,"[chicken breast and canned tomatoes, chicken p...","You need a flour bowl, a bowl for milk and egg...","[Place a large, non-stick pan over high heat a...",[],[Main Dishes],[Boiling],[],"[{'name': 'chicken cutlets', 'category': 'Meat...",4.656250,32,[],20.254345,16.603507
982,64388c11c2ca552ddb65770d,Easy Shredded Chicken Breast Hack,https://www.yummly.com/recipe/Easy-Shredded-Ch...,YummlyOriginal,[],"To plan for busy weeknights, prepping ahead is...",[Preheat the oven to 350°F. Line a sheet pan w...,[],[Lunch],[Baking],[],"[{'name': 'boneless skinless chicken breasts',...",5.000000,6,"[{'name': 'FAT_KCAL', 'value': '25.0 kcal', 'p...",-3.339066,5.192533
983,64388c11c2ca552ddb65770e,Italian Cupboard Soup,https://www.yummly.com/recipe/Italian-Cupboard...,BasicRecipe,"[soup, soups, pork soup, all soups, italian so...",Complete this soup with warmed Italian bread a...,"[In a deep saucepan, brown the pork in a littl...",[],[Soups],"[Browning, Boiling]",[],"[{'name': 'boneless pork chops', 'category': '...",4.562500,16,"[{'name': 'FAT_KCAL', 'value': '25.0 kcal', 'p...",21.008135,-11.177556
984,64388c11c2ca552ddb65770f,American Pride Pork Chop,https://www.yummly.com/recipe/American-Pride-P...,BasicRecipe,"[pork chops, pork chop, with pork with pork ch...",,"[Combine the brown sugar, salt, pepper, allspi...",[],[Main Dishes],[Grilling],[],"[{'name': 'bone-in ribeye (rib) pork chops', '...",4.833333,6,"[{'name': 'K', 'value': '5.0 g', 'pctDailyValu...",-0.960588,0.047394
