# Project: Cuisine Classified
Authors: 
* Ryan Chang - rchan123
* Ryan Chandler - rchan129
* Kunal Mittal - kmitt006
* Fiorello Estuar - festu001
* Kiet Lam - klam073

# Description

This project aims to analyze the relationships between various flavors found in different foods and the cuisines that they are prominent in. 

# Reading/Cleaning Data
This section handles any cleaning/pre-processing of data such that analysis can take place.

## Reading

In [1]:
# importing libraries
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
import plotly.express as px
import json, os

flavor_dict = {'piquant': [], 'meaty': [], 'bitter': [], 'sweet': [], 'sour': [], 'salty': []}
FLAVORS = list(flavor_dict.keys())
CUISINES = ['American','Chinese','Cuban','English','French','German','Greek','Hawaiian','Hungarian','Indian','Italian','Japanese','Mexican','Moroccan','Portuguese','Spanish','Swedish','Thai']

In [2]:
# Reading in the data
food_list_folder = 'data/lists/'
food_list_jsons = [food_json for food_json in os.listdir(food_list_folder) if food_json.endswith('.json')]

food_df = pd.DataFrame(columns=['recipeName','ingredients','cuisine','flavors'])

for index, js in enumerate(food_list_jsons):
    with open(os.path.join(food_list_folder, js)) as food_list_file:
        food_json = json.load(food_list_file)
        num_items = len(food_json['matches'])
        for idx in range(0,num_items):
            f_obj = food_json['matches'][idx]
            f_id = f_obj['id']
            f_recipeName = f_obj['recipeName']
            f_ingredients = f_obj['ingredients']
            f_cuisine = f_obj['attributes']['cuisine']
            f_flavors = f_obj['flavors']
            food_df.loc[f_id] =  np.array([f_recipeName,f_ingredients,f_cuisine,f_flavors],dtype=object)

## Cleaning

In [3]:
# dropping all rows that do not have labels
food_df = food_df[food_df['flavors'].notna()]
food_df.head()

Unnamed: 0,recipeName,ingredients,cuisine,flavors
Revolutionary-Mac-_-Cheese-1048867,Revolutionary Mac & Cheese,"[dried pasta, milk, shredded cheddar cheese, s...","[Kid-Friendly, Italian, American]","{'piquant': 0.16666666666666666, 'meaty': 0.16..."
Chicago-Chicken-822419,Chicago Chicken,"[jalapeno chilies, lemon, dried oregano, olive...","[Barbecue, American]","{'piquant': 0.8333333333333334, 'meaty': 0.666..."
Chicken-Avocado-Burgers-1031197,Chicken Avocado Burgers,"[ground chicken, avocado, chopped garlic, pank...","[Barbecue, Mexican, American]","{'piquant': 0.6666666666666666, 'meaty': 1.0, ..."
Best-Basic-Burger-500667,Best Basic Burger,"[ground beef, eggs, salt, ground black pepper,...","[Barbecue, American]","{'piquant': 0.8333333333333334, 'meaty': 0.833..."
Easy-garlic-and-lemon-shrimp-309257,Easy Garlic and Lemon Shrimp,"[jumbo shrimp, olive oil, butter, minced garli...","[Barbecue, Italian, Asian, American]","{'piquant': 0.0, 'meaty': 0.8333333333333334, ..."


In [4]:
# clean the flavors
flavor_dict = {'piquant': [], 'meaty': [], 'bitter': [], 'sweet': [], 'sour': [], 'salty': []}
for flavors in food_df.flavors:    
    for specificFlavor in flavors:
        flavor_dict[specificFlavor].append(flavors[specificFlavor])

for flavorKey in flavor_dict:
    try:
        food_df.insert(len(food_df.columns), flavorKey, flavor_dict[flavorKey])
    except:
        pass

food_df.drop(columns = ["flavors"], inplace=True)

In [11]:
# clean the cuisines, give each it's own column and label 0 or 1 based on presence
for cus in CUISINES:
    for key, cus_list in food_df.iterrows():
        food_df.loc[key,cus] = 1 if cus in cus_list['cuisine'] else 0
food_df.head()

Unnamed: 0,recipeName,ingredients,cuisine,piquant,meaty,bitter,sweet,sour,salty,American,...,Hungarian,Indian,Italian,Japanese,Mexican,Moroccan,Portuguese,Spanish,Swedish,Thai
Revolutionary-Mac-_-Cheese-1048867,Revolutionary Mac & Cheese,"[dried pasta, milk, shredded cheddar cheese, s...","[Kid-Friendly, Italian, American]",0.166667,0.166667,0.666667,0.166667,0.166667,0.833333,1,...,0,0,1,0,0,0,0,0,0,0
Chicago-Chicken-822419,Chicago Chicken,"[jalapeno chilies, lemon, dried oregano, olive...","[Barbecue, American]",0.833333,0.666667,0.166667,0.0,0.833333,0.166667,1,...,0,0,0,0,0,0,0,0,0,0
Chicken-Avocado-Burgers-1031197,Chicken Avocado Burgers,"[ground chicken, avocado, chopped garlic, pank...","[Barbecue, Mexican, American]",0.666667,1.0,1.0,1.0,1.0,1.0,1,...,0,0,0,0,1,0,0,0,0,0
Best-Basic-Burger-500667,Best Basic Burger,"[ground beef, eggs, salt, ground black pepper,...","[Barbecue, American]",0.833333,0.833333,0.666667,0.166667,0.166667,0.833333,1,...,0,0,0,0,0,0,0,0,0,0
Easy-garlic-and-lemon-shrimp-309257,Easy Garlic and Lemon Shrimp,"[jumbo shrimp, olive oil, butter, minced garli...","[Barbecue, Italian, Asian, American]",0.0,0.833333,0.333333,0.166667,0.5,0.666667,1,...,0,0,1,0,0,0,0,0,0,0


In [6]:
testing_df_kunal = food_df.copy()

def filterCuisines(df):
    newCol = []
    for cuisineList in df["cuisine"]:
        temp = []
        for cuisine in cuisineList:
            if cuisine in CUISINES:
                temp.append(cuisine)
        newCol.append(temp)
    return newCol

testing_df_kunal.assign(CleanedCuisine=filterCuisines, inplace=True)
testing_df_kunal.head()



Unnamed: 0,recipeName,ingredients,cuisine,piquant,meaty,bitter,sweet,sour,salty,American,...,Hungarian,Indian,Italian,Japanese,Mexican,Moroccan,Portuguese,Spanish,Swedish,Thai
Revolutionary-Mac-_-Cheese-1048867,Revolutionary Mac & Cheese,"[dried pasta, milk, shredded cheddar cheese, s...","[Kid-Friendly, Italian, American]",0.166667,0.166667,0.666667,0.166667,0.166667,0.833333,True,...,False,False,True,False,False,False,False,False,False,False
Chicago-Chicken-822419,Chicago Chicken,"[jalapeno chilies, lemon, dried oregano, olive...","[Barbecue, American]",0.833333,0.666667,0.166667,0.0,0.833333,0.166667,True,...,False,False,False,False,False,False,False,False,False,False
Chicken-Avocado-Burgers-1031197,Chicken Avocado Burgers,"[ground chicken, avocado, chopped garlic, pank...","[Barbecue, Mexican, American]",0.666667,1.0,1.0,1.0,1.0,1.0,True,...,False,False,False,False,True,False,False,False,False,False
Best-Basic-Burger-500667,Best Basic Burger,"[ground beef, eggs, salt, ground black pepper,...","[Barbecue, American]",0.833333,0.833333,0.666667,0.166667,0.166667,0.833333,True,...,False,False,False,False,False,False,False,False,False,False
Easy-garlic-and-lemon-shrimp-309257,Easy Garlic and Lemon Shrimp,"[jumbo shrimp, olive oil, butter, minced garli...","[Barbecue, Italian, Asian, American]",0.0,0.833333,0.333333,0.166667,0.5,0.666667,True,...,False,False,True,False,False,False,False,False,False,False


# Elementary Data Analysis

In [7]:
# flavor
#   v1. MMM - using every recipe together at first (broken down by cuisine)
#       - flavors: broken down into the 6 categories
#   v2. which are the top n cuisines (frequency) in the dataset
#   v3. for each of the cuisines, visualize the flavors - spider graph
#   v4. ??? 

# ingredients
#   v1. MMM
#   v2. 


## Cuisine vs. Flavor

In this section, we aim to analyze the relationship between a recipe's cuisine and the flavors that are associated with each respective cuisine.

In [16]:
cf_df_1 = food_df[CUISINES + FLAVORS]
cuisine_flavor_means = pd.pivot_table(cf_df_1,index=FLAVORS,columns=CUISINES,aggfunc=np.mean)
cuisine_flavor_means.head()

piquant,meaty,bitter,sweet,sour,salty
0.0,0.0,0.166667,0.0,0.0,0.166667
0.0,0.0,0.166667,0.0,0.166667,0.166667
0.0,0.0,0.166667,0.166667,0.0,0.166667
0.0,0.0,0.333333,0.166667,0.166667,0.833333
0.0,0.166667,0.166667,0.0,0.0,0.833333


# K-Nearest Neighbors Classifier

In [9]:
#ML STUFF
neighbordata = food_df.loc[:, FLAVORS].values.tolist()
def knn(unknown, data, neighbors: int):
    classifier = NearestNeighbors(n_neighbors=neighbors)
    classifier.fit(data)
    return classifier.kneighbors([unknown])

closestpts = knn(random, datastuff, 5)
finalCuisine = dict(zip(CUISINES, [0] * len(CUISINES)))
for food in closestpts:
    for listCuisines in food_df.iloc[food, food_df.columns.get_loc("cuisine")]:
        for cuisine in listCuisines:
             finalCuisine[cuisine] += 1/len(listCuisines)
        

NameError: name 'random' is not defined

{'American': 0,
 'Chinese': 0,
 'Cuban': 0,
 'English': 0,
 'French': 0,
 'German': 0,
 'Greek': 0,
 'Hawaiian': 0,
 'Hungarian': 0,
 'Indian': 0,
 'Italian': 0,
 'Japanese': 0,
 'Mexican': 0,
 'Moroccan': 0,
 'Portuguese': 0,
 'Spanish': 0,
 'Swedish': 0,
 'Thai': 0}