### Import pandas library

In [141]:
import pandas as pd
import numpy as np

## Read Recipe data and write CSVs for Recipes, Ingredients, and Users

### Read recipe json and clean dataframe (populated from ChatGPT)

In [180]:
recipes = pd.read_json('../seeds/recipes.json', orient='records')
recipes = pd.json_normalize(recipes['recipes'], max_level=0)[['name', 'author', 'meal', 'dish', 'cuisine', 'servings', 'time_required', 'ingredients', 'directions']]
recipes.index += 1 
recipes = recipes.reset_index().rename(columns={'index': 'recipe_id'})

### Retrieve users from the recipe list to populate User table

In [181]:
users = recipes.drop_duplicates('author')['author'].reset_index(drop=True)
users.index += 1 
users = users.reset_index().rename(columns={'index': 'user_id', 'author': 'name'})

### Retrieve ingredients from the recipe list to populate Ingredient table

In [182]:
ingredients = recipes.explode('ingredients')
ingredients = ingredients.reset_index()
ingredients = ingredients.drop(columns='name')
split = pd.DataFrame(ingredients['ingredients'].to_list(), columns = ['name', 'amount', 'metric'])
ingredients = pd.concat([ingredients, split], axis=1)[['recipe_id', 'name', 'amount', 'metric']]
ingredients.index += 1 
ingredients = ingredients.reset_index().rename(columns={'index': 'ingredient_id'})

### Use IDs from Ingredient and User table to add references in Recipe table

In [183]:
recipes['ingredients'] = recipes['recipe_id'].apply(lambda x: ingredients.loc[ingredients['recipe_id'] == x, 'ingredient_id'].to_list())

In [184]:
recipes['user_id'] = recipes['author'].apply(lambda x: users.loc[users['name']== x, 'user_id'].iloc[0])
recipes = recipes.drop(columns = 'author')

In [185]:
# recipes.name.drop_duplicates().count()
recipes.name.count()

88

### Verify tables have been set up properly

In [186]:
ingredients.head()

Unnamed: 0,ingredient_id,recipe_id,name,amount,metric
0,1,1,spinach,2,"cups, fresh"
1,2,1,kale,1,"cup, fresh"
2,3,1,banana,1,frozen
3,4,1,green_apple,1,cored and chopped
4,5,1,kiwi,2,peeled and sliced


In [187]:
# ingredients['name'].unique()

### QA/QC to make sure the numbers are isolated

In [188]:
ingredients['amount'].unique()

array(['2', '1', '3', '', '4', '6', '1/4', '1/2', '9', '28', '15', '14',
       '250', '200', '500', '8', '1 1/2', '400', '300', '100', '800',
       '20', '12', '5', '150', '30', '32', '1/3', '16', '1.5', '750'],
      dtype=object)

### Write to tables to individual CSVs 

In [191]:
recipes.to_csv('recipes.csv')

In [192]:
ingredients.to_csv('ingredients.csv')

In [193]:
users.to_csv('users.csv')

## Read Comments JSON and write to Ratings CSV

### Read comments json and clean dataframe (populated from ChatGPT)

In [200]:
ratings = pd.read_json('../seeds/comments.json', orient='index')
ratings = ratings['comments'].explode()
ratings = ratings.reset_index().rename(columns={'index':'recipe_id', 'comments': 'comment'})

### Generate random ratings and user_ids

In [201]:
ratings['rating'] = np.random.randint(3, 6, ratings.shape[0])
ratings['user_id'] = np.random.randint(1, 86, ratings.shape[0])
ratings = ratings.reset_index().rename(columns={'index':'rating_id'})

### Verify data is formatted correctly

In [202]:
ratings.tail(20)

Unnamed: 0,rating_id,recipe_id,comment,rating,user_id
244,244,83,"Risotto is a favorite, and this Creamy Mushroo...",5,77
245,245,83,Creamy Mushroom Risotto is a must-try for any ...,5,21
246,246,84,"Meat Pie (Tourtière) is a Canadian classic, an...",4,25
247,247,84,A taste of Canada! The Meat Pie (Tourtière) wa...,5,5
248,248,84,Meat Pie (Tourtière) is a must-try Canadian di...,4,59
249,249,85,"Coq au Vin is a classic French dish, and this ...",4,6
250,250,85,A taste of France! The Coq au Vin was flavorfu...,3,38
251,251,85,"Coq au Vin is a must-try French dish, and this...",3,11
252,252,86,"Ratatouille is a classic French dish, and this...",4,7
253,253,86,A taste of France! The Ratatouille was flavorf...,3,79


### QA/QC to make sure the recipe_ids are complete

In [203]:
print(set(range(0, 100))-set(ratings.recipe_id.unique()))
print(set(ratings.recipe_id.unique())-set(range(0, 100)))

{0, 96, 97, 98, 99, 89, 90, 91, 92, 93, 94, 95}
set()


### Write to table to CSV

In [204]:
ratings.to_csv('ratings.csv')