In [6]:
# Basic imports
import os
import time
import timeit
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Machine Learning packages
from sklearn import ensemble
from sklearn.feature_selection import chi2, f_classif, SelectKBest 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.preprocessing import normalize

# Natural Language processing
import nltk
import re
from collections import Counter
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.datasets import fetch_rcv1
from sklearn.feature_extraction.text import TfidfVectorizer

# Clustering packages
import sklearn.cluster as cluster
from sklearn.cluster import KMeans, MeanShift, estimate_bandwidth, SpectralClustering, AffinityPropagation
from scipy.spatial.distance import cdist

# Plotly packages
import plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go
from plotly import tools
import cufflinks as cf
import ipywidgets as widgets
from scipy import special
py.offline.init_notebook_mode(connected=True)

In [9]:
# Import the dataframe
cuisine_train = pd.read_json('data/train.json')
cuisine_test = pd.read_json('data/test.json')

In [17]:
# Print size of the training and testing data sets
print('Training set size: ' + str(cuisine_train.shape))
print('Testing set size: ' + str(cuisine_test.shape))

Training set size: (39774, 3)
Testing set size: (9944, 2)


In [10]:
cuisine_train.head()

Unnamed: 0,cuisine,id,ingredients
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,22213,"[water, vegetable oil, wheat, salt]"
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe..."


In [22]:
cuisine_train['ingredients'].str.get_dummies(sep=',')

Unnamed: 0,"""BREAKSTONE'S Sour Cream""","""Best Food's Mayonnaise with Lime Juice""","""Campbell's Condensed Cheddar Cheese Soup""","""Campbell's Condensed Cream of Chicken Soup""","""Campbell's Condensed Cream of Chicken Soup""]","""Campbell's Condensed Cream of Mushroom Soup""","""Campbell's Condensed Tomato Soup""","""Colman's Mustard Powder""","""Cox's Orange Pippin""","""Eggland's BestÂ® eggs""",...,['yellow tomato',['yoghurt',['yogurt low fat',['yolk',['yucca root',['yukon gold potatoes',['yuzu juice',['zinfandel',['ziti',['zucchini'
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
cuisine_train

Unnamed: 0,cuisine,id,ingredients,concat_ingredients
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes...","romaine lettuce,black olives,grape tomatoes,ga..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g...","plain flour,ground pepper,salt,tomatoes,ground..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g...","eggs,pepper,salt,mayonaise,cooking oil,green c..."
3,indian,22213,"[water, vegetable oil, wheat, salt]","water,vegetable oil,wheat,salt"
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe...","black pepper,shallots,cornflour,cayenne pepper..."
5,jamaican,6602,"[plain flour, sugar, butter, eggs, fresh ginge...","plain flour,sugar,butter,eggs,fresh ginger roo..."
6,spanish,42779,"[olive oil, salt, medium shrimp, pepper, garli...","olive oil,salt,medium shrimp,pepper,garlic,cho..."
7,italian,3735,"[sugar, pistachio nuts, white almond bark, flo...","sugar,pistachio nuts,white almond bark,flour,v..."
8,mexican,16903,"[olive oil, purple onion, fresh pineapple, por...","olive oil,purple onion,fresh pineapple,pork,po..."
9,italian,12734,"[chopped tomatoes, fresh basil, garlic, extra-...","chopped tomatoes,fresh basil,garlic,extra-virg..."
