# What's_Cooking

<img src='whatscooking.png'>

In [1]:
import pandas as pd

In [2]:
df = pd.read_json(open("train.json", "r", encoding="utf8"))

In [4]:
df.head()

Unnamed: 0,id,cuisine,ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,22213,indian,"[water, vegetable oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe..."


In [5]:
df.shape

(39774, 3)

In [6]:
df['ingredients'].head(5)

0    [romaine lettuce, black olives, grape tomatoes...
1    [plain flour, ground pepper, salt, tomatoes, g...
2    [eggs, pepper, salt, mayonaise, cooking oil, g...
3                  [water, vegetable oil, wheat, salt]
4    [black pepper, shallots, cornflour, cayenne pe...
Name: ingredients, dtype: object

In [7]:
setim = set() # Burada set kullandım çünkü listede eğer aynı veriden varsa bunlardan tekini alacak ve bize 
              # farklı olanları verecek

for i in df['ingredients']: 
    for j in i:
        setim.add(j)
len(setim) 

6714

In [11]:
setim

{'cress',
 'poured fondant',
 'canned beef broth',
 'vermicelli noodles',
 'bulb',
 'duck breasts',
 'cardamom seeds',
 'white miso',
 'sliced meat',
 'fenugreek seeds',
 'ham',
 'refrigerated biscuits',
 'cereal',
 'spam',
 'quick oats',
 'low-fat turkey kielbasa',
 'canned coconut milk',
 'pastry tart shell',
 'white hominy',
 'besan (flour)',
 'black cumin seeds',
 'dark chocolate',
 'farofa',
 'mo hanh',
 'fresh onion',
 'onion slices',
 'light chicken stock',
 'boneless beef chuck roast',
 'gluten-free pasta',
 'fregola',
 'firm tofu',
 'Zatarain’s Jambalaya Mix',
 'fermented bean curd',
 'sesame seeds buns',
 'blanched almond flour',
 'mcintosh apples',
 'chocolate baking bar',
 'granulated garlic',
 'breast of lamb',
 'bacon salt',
 'sake',
 'gran marnier',
 'black olives',
 'verjus',
 'gravy mix mushroom',
 'pappardelle pasta',
 'jujube',
 'taco seasoning',
 'epazote',
 'Kroger Black Beans',
 'wine syrup',
 'baby leaf lettuce',
 'canned tomatoes',
 'Knorr® Pasta Sides™ - Chicke

# Data Cleansing

In [12]:
def clearingandconverting(text):
    
      
    text =" ".join(text)  # Virgüllerle ayrılmış listeyi join methodu ile bir cümle haline getirdim
    
    text=text.lower()                    # Buradan sonraki 4 satırd ise NLP methodlarını uygulayabilmek adına
                                         # bütün veriyi küçük harflere çevirdik ve içlerinden numerik 
                                         # verileri ve de sembolleri attık
    text=text.replace("[^\w\s]","") 
    text=text.replace("\d+","") 
    text=text.replace("\n"," ").replace("\r","") 
    
    return text

In [13]:
df['ingredients'] = df['ingredients'].apply(clearingandconverting)

# Modeling

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from textblob import TextBlob
from nltk.stem.snowball import SnowballStemmer

stemmer = SnowballStemmer('english') 

def split_into_lemmas(text):    # Stemma analiz methodunu tanımladık
    
    text = str(text).lower()   
    
    words = TextBlob(text).words
    
    return [stemmer.stem(word) for word in words]

In [15]:
x,y=df['ingredients'],df['cuisine']

In [16]:
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=80)

In [17]:
vect=CountVectorizer(lowercase=True, stop_words='english', ngram_range=(1,2), analyzer=split_into_lemmas)
x_train_dtm=vect.fit_transform(x_train,y_train)
x_test_dtm=vect.transform(x_test)



In [18]:
x_train_dtm

<29830x2619 sparse matrix of type '<class 'numpy.int64'>'
	with 557553 stored elements in Compressed Sparse Row format>

In [19]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [20]:
b=MultinomialNB()
model=b.fit(x_train_dtm,y_train)
b_predict=b.predict(x_test_dtm)

In [21]:

accuracy_score(y_test,b_predict)

0.7247586484312148

In [22]:
def vectorizing(text):
    
    return vect.transform([text])

In [23]:

df2 = pd.read_json(open("train.json", "r", encoding="utf8"))

In [24]:

model.predict(vectorizing(clearingandconverting(df2['ingredients'][12])))

array(['italian'], dtype='<U12')

# Modelin Test Edilmesi

In [26]:
df_test = pd.read_json('test.json')

In [27]:
df_test.head()

Unnamed: 0,id,ingredients
0,18009,"[baking powder, eggs, all-purpose flour, raisi..."
1,28583,"[sugar, egg yolks, corn starch, cream of tarta..."
2,41580,"[sausage links, fennel bulb, fronds, olive oil..."
3,29752,"[meat cuts, file powder, smoked sausage, okra,..."
4,35687,"[ground black pepper, salt, sausage casings, l..."


In [28]:
df_test['ingredients'] = test['ingredients'].apply(clearingandconverting)

In [29]:
vext_mod = test['ingredients'].apply(vectorizing)

In [30]:
list = []
for v in vext_mod:
    list.append(model.predict(v))

In [31]:
list

[array(['british'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['italian'], dtype='<U12'),
 array(['cajun_creole'], dtype='<U12'),
 array(['italian'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['french'], dtype='<U12'),
 array(['chinese'], dtype='<U12'),
 array(['mexican'], dtype='<U12'),
 array(['british'], dtype='<U12'),
 array(['italian'], dtype='<U12'),
 array(['greek'], dtype='<U12'),
 array(['indian'], dtype='<U12'),
 array(['italian'], dtype='<U12'),
 array(['british'], dtype='<U12'),
 array(['french'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['mexican'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['japanese'], dtype='<U12'),
 array(['indian'], dtype='<U12'),
 array(['french'], dtype='<U12'),
 array(['vietnamese'], dtype='<U12'),
 array(['italian'], dtype='<U12'),
 array(['southern_us'], dtype='<U12'),
 array(['vietnamese'], dtype='<U12'),
 array(['korean'], dtype='

In [32]:
df = pd.DataFrame(list)

In [33]:

df['cuisine']=df.iloc[:,0]

In [35]:
df['0'] =df_test['id']

In [36]:
df.columns = ['0', 'cuisine', 'id']

In [37]:
df.drop('0', axis=1, inplace=True)

In [38]:
df.head()

Unnamed: 0,cuisine,id
0,british,18009
1,southern_us,28583
2,italian,41580
3,cajun_creole,29752
4,italian,35687


In [39]:

df.set_index('id', inplace=True)

In [40]:
df.to_csv('submission.csv')

<img src='kaggle_whats_cooking_puna.png'>

In [44]:
##pip install PySimpleGUI

# Python Gui - Bence Nice Hareketler

In [45]:
import PySimpleGUI as sg
import time
from textblob import TextBlob


layout = [[sg.Text('Yemek Mutfağı Tahminleme', font=("Helvetica", 25))],
          [sg.Image(filename=r"C:\Users\PC11\Desktop\AI\projects\Kaggle_Projects\What's_Cooking\nltk.png", size=(75, 81))],
              [sg.Text('Yemek İçeriklerini Giriniz')],
              [sg.In(justification='center', key='-IN-', enable_events=True, size=(60, 60))],
              [],
              [sg.Button('Analiz et'), sg.Text(size=(25,1), key='-OUTPUT-')],
              [sg.Button('Exit')],
              [sg.Text('Created by Md', font=("Helvetica", 8))]]

window = sg.Window('', layout, element_justification='c')

while True:  # Event Loop
    
    event, values = window.read()
    
    if event == sg.WIN_CLOSED or event == 'Exit':
        
        break
        
    if event == 'Analiz et':
        
        sonuc = model.predict(vectorizing(clearingandconverting(values['-IN-'])))
        
        window['-OUTPUT-'].update(sonuc)