# Project: Cuisine Classified
Authors: 
* Ryan Chang - rchan123
* Ryan Chandler - rchan129
* Kunal Mittal - kmitt006
* Fiorello Estuar - festu001
* Kiet Lam - klam073

# Description

This project aims to analyze the relationships between various flavors found in different foods and the cuisines that they are prominent in. 

# Reading/Cleaning Data
This section handles any cleaning/pre-processing of data such that analysis can take place.

## Reading

In [1]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
import json, os

In [2]:
# Reading in the data
food_list_folder = 'data/lists/'
food_list_jsons = [food_json for food_json in os.listdir(food_list_folder) if food_json.endswith('.json')]

food_df = pd.DataFrame(columns=['recipeName','ingredients','cuisine','flavors'])

for index, js in enumerate(food_list_jsons):
    with open(os.path.join(food_list_folder, js)) as food_list_file:
        food_json = json.load(food_list_file)
        num_items = len(food_json['matches'])
        for idx in range(0,num_items):
            f_obj = food_json['matches'][idx]
            f_id = f_obj['id']
            f_recipeName = f_obj['recipeName']
            f_ingredients = f_obj['ingredients']
            f_cuisine = f_obj['attributes']['cuisine']
            f_flavors = f_obj['flavors']
            food_df.loc[f_id] =  np.array([f_recipeName,f_ingredients,f_cuisine,f_flavors],dtype=object)

## Cleaning

In [3]:
# dropping all rows that do not have labels
food_df = food_df[food_df['flavors'].notna()]
food_df.head()

Unnamed: 0,recipeName,ingredients,cuisine,flavors
Revolutionary-Mac-_-Cheese-1048867,Revolutionary Mac & Cheese,"[dried pasta, milk, shredded cheddar cheese, s...","[Kid-Friendly, Italian, American]","{'piquant': 0.16666666666666666, 'meaty': 0.16..."
Chicago-Chicken-822419,Chicago Chicken,"[jalapeno chilies, lemon, dried oregano, olive...","[Barbecue, American]","{'piquant': 0.8333333333333334, 'meaty': 0.666..."
Chicken-Avocado-Burgers-1031197,Chicken Avocado Burgers,"[ground chicken, avocado, chopped garlic, pank...","[Barbecue, Mexican, American]","{'piquant': 0.6666666666666666, 'meaty': 1.0, ..."
Best-Basic-Burger-500667,Best Basic Burger,"[ground beef, eggs, salt, ground black pepper,...","[Barbecue, American]","{'piquant': 0.8333333333333334, 'meaty': 0.833..."
Easy-garlic-and-lemon-shrimp-309257,Easy Garlic and Lemon Shrimp,"[jumbo shrimp, olive oil, butter, minced garli...","[Barbecue, Italian, Asian, American]","{'piquant': 0.0, 'meaty': 0.8333333333333334, ..."


In [4]:
# clean the flavors
flavor_dict = {'piquant': [], 'meaty': [], 'bitter': [], 'sweet': [], 'sour': [], 'salty': []}
for flavors in food_df.flavors:    
    for specificFlavor in flavors:
        flavor_dict[specificFlavor].append(flavors[specificFlavor])

for flavorKey in flavor_dict:
    try:
        food_df.insert(len(food_df.columns), flavorKey, flavor_dict[flavorKey])
    except:
        pass

food_df.head()
food_df.drop(columns = ["flavors"], inplace=True)

In [5]:
# clean the cuisines, give each it's own column and label 0 or 1 based on presence


# Elementary Data Analysis

In [6]:
# flavor
#   v1. MMM - using every recipe together at first (broken down by cuisine)
#       - flavors: broken down into the 6 categories
#   v2. which are the top n cuisines (frequency) in the dataset
#   v3. for each of the cuisines, visualize the flavors - spider graph
#   v4. ??? 

# ingredients
#   v1. MMM
#   v2. 
