### Import pandas library

In [116]:
import pandas as pd
import numpy as np

## Read Recipe data and write CSVs for Recipes, Ingredients, and Users

### Read recipe json and clean dataframe (populated from ChatGPT)

In [38]:
recipes = pd.read_json('../seeds/recipes.json', orient='records')
recipes = pd.json_normalize(recipes['recipes'], max_level=0)[['name', 'author', 'meal', 'dish', 'cuisine', 'servings', 'time_required', 'ingredients', 'directions']]
recipes = recipes.reset_index().rename(columns={'index': 'recipe_id'})

### Retrieve users from the recipe list to populate User table

In [39]:
users = recipes.drop_duplicates('author')['author'].reset_index(drop=True)
users = users.reset_index().rename(columns={'index': 'user_id', 'author': 'name'})

### Retrieve ingredients from the recipe list to populate Ingredient table

In [40]:
ingredients = recipes.explode('ingredients')
ingredients = ingredients.reset_index()
ingredients = ingredients.drop(columns='name')
split = pd.DataFrame(ingredients['ingredients'].to_list(), columns = ['name', 'amount', 'metric'])
ingredients = pd.concat([ingredients, split], axis=1)[['recipe_id', 'name', 'amount', 'metric']]
ingredients = ingredients.reset_index().rename(columns={'index': 'ingredient_id'})

### Use IDs from Ingredient and User table to add references in Recipe table

In [41]:
recipes['ingredients'] = recipes['recipe_id'].apply(lambda x: ingredients.loc[ingredients['recipe_id'] == x, 'ingredient_id'].to_list())

In [42]:
recipes['user_id'] = recipes['author'].apply(lambda x: users.loc[users['name']== x, 'user_id'].iloc[0])
recipes = recipes.drop(columns = 'author')

In [43]:
# recipes.name.drop_duplicates().count()
recipes.name.count()

50

### Verify tables have been set up properly

In [46]:
# recipes.head()

In [47]:
# ingredients['name'].unique()

### QA/QC to make sure the numbers are isolated

In [48]:
ingredients['amount'].unique()

array(['2', '1', '3', '', '4', '6', '1/4', '1/2', '9', '28', '15', '14',
       '250', '200', '500', '8', '1 1/2', '400', '300', '100', '800',
       '20', '12', '5', '150'], dtype=object)

### Write to tables to individual CSVs 

In [49]:
recipes.to_csv('recipes.csv')

In [50]:
ingredients.to_csv('ingredients.csv')

In [51]:
users.to_csv('users.csv')

## Read Comments JSON and write to Ratings CSV

### Read comments json and clean dataframe (populated from ChatGPT)

In [117]:
ratings = pd.read_json('../seeds/comments.json', orient='index')
ratings = ratings['comments'].explode()
ratings = ratings.reset_index().rename(columns={'index':'recipe_id', 'comments': 'comment'})

### Generate random ratings and user_ids

In [118]:
ratings['rating'] = np.random.randint(3, 6, ratings.shape[0])
ratings['user_id'] = np.random.randint(0, 49, ratings.shape[0])
ratings = ratings.reset_index().rename(columns={'index':'rating_id'})

### Verify data is formatted correctly

In [119]:
ratings.tail(20)

Unnamed: 0,rating_id,recipe_id,comment,rating,user_id
130,130,48,A taste of Korea! The Dak Galbi was flavorful ...,3,19
131,131,48,Delicious and satisfying. Dak Galbi is a must-...,3,16
132,132,49,Banchan - Spinach Namul is a Korean side dish ...,3,0
133,133,49,A taste of Korea! The Banchan - Spinach Namul ...,5,47
134,134,49,Authentic and delicious. Banchan - Spinach Nam...,5,21
135,135,5,"Spicy, savory, and the shrimp were cooked to p...",3,45
136,136,5,Absolutely loved it! The balance of flavors an...,4,36
137,137,5,"Thai red curry is my go-to, and this recipe di...",4,4
138,138,6,"Crispy on the outside, tender on the inside. T...",3,34
139,139,6,A delightful salmon dish. The lemon and garlic...,3,6


### QA/QC to make sure the recipe_ids are complete

In [120]:
print(set(range(0, 50))-set(ratings.recipe_id.unique()))
print(set(ratings.recipe_id.unique())-set(range(0, 50)))

set()
set()


### Write to table to CSV

In [121]:
ratings.to_csv('ratings.csv')