In [1]:
#### IMPORTS ####

# Visualisation et manipulation des données
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Fichiers
import json
import pickle
import os

# Apprentissage
from sklearn.manifold import TSNE as tsne
from sklearn.decomposition import NMF


In [2]:
#### Chargement des fichiers ####

# Fichier json d'apprentissage
with open('../input/whats-cooking/train.json', 'r') as f:
    datas = json.load(f) # id, cuisine, ingredients[]

# Fichier json dont on cherche les types de cuisines
with open('../input/whats-cooking/test.json', 'r') as f2:
    datas2 = json.load(f2) # id, ingredients[]


In [3]:
#### Extraction des données des fichiers ####

csv = []
csv2 = []

for data in datas:
    for ingredient in data['ingredients']:
        # Ajout de la ligne id-cuisine-ingredient
        tmp = dict()
        tmp['id'] = str(data['id'])
        tmp['cuisine'] = data['cuisine']
        tmp['ingredient'] = ingredient
        tmp['val'] = 1
        csv.append(tmp)

for data in datas:
    for ingredient in data['ingredients']:
        # Ajout de la ligne id-ingredient
        tmp = dict()
        tmp['id'] = str(data['id'])
        tmp['ingredient'] = ingredient
        tmp['val'] = 1
        csv2.append(tmp)

dfs = pd.DataFrame(csv) # DataFrame des données Source (d'apprentissage : train.json)
dft = pd.DataFrame(csv2) # DataFrame des données de Test (test.json)

In [4]:
#### Récupération des données extraites ####

cuisines = sorted(list(dfs['cuisine'].unique()))
ingredients = sorted(list(dfs['ingredient'].unique()))
id_plats_train = sorted(list(dfs['id'].unique()))
id_plats_test = sorted(list(dft['id'].unique()))

In [5]:
# Création des matrices

util_dfs = pd.pivot_table(data = dfs, values = 'val', index = 'id', columns = 'ingredient')
util_dft = pd.pivot_table(data = dfs, values = 'val', index = 'id', columns = 'ingredient')

In [6]:
util_dfs = util_dfs.fillna(0)
util_dft = util_dft.fillna(0)

In [7]:
X = util_dfs

# Création du model
model = NMF(n_components = 6, init='random', random_state=0)

# Apprentissage du model (décomposition en deux matrices à 6 dimensions)
W = model.fit_transform(X)

In [8]:
X2 = util_dft # données de test

# Décomposition en deux matrices en utilisant le model
W2 = model.transform(X2)

In [9]:
print(W, "\n\n", W2)

[[0.         0.         0.12876519 0.00161134 0.         0.        ]
 [0.11214663 0.         0.01343381 0.         0.07464309 0.02471402]
 [0.01455789 0.         0.         0.07010848 0.00127487 0.07695549]
 ...
 [0.         0.03656691 0.00041074 0.         0.         0.        ]
 [0.00018086 0.         0.06515019 0.0090832  0.07218028 0.00614986]
 [0.00298885 0.00145084 0.01462306 0.         0.00203041 0.0029329 ]] 

 [[0.         0.         0.12876468 0.00161225 0.         0.        ]
 [0.11214374 0.         0.01343495 0.         0.07464317 0.02471426]
 [0.01456487 0.         0.         0.07010135 0.00127458 0.07695603]
 ...
 [0.         0.03656674 0.00041077 0.         0.         0.        ]
 [0.00018158 0.         0.06514923 0.00908354 0.07218026 0.00614998]
 [0.0029888  0.0014508  0.01462297 0.         0.00203045 0.00293289]]


In [45]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(W)

NearestNeighbors(algorithm='auto', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=1, p=2,
                 radius=1.0)

In [50]:
print(W2)

[[0.         0.         0.12876468 0.00161225 0.         0.        ]
 [0.11214374 0.         0.01343495 0.         0.07464317 0.02471426]
 [0.01456487 0.         0.         0.07010135 0.00127458 0.07695603]
 ...
 [0.         0.03656674 0.00041077 0.         0.         0.        ]
 [0.00018158 0.         0.06514923 0.00908354 0.07218026 0.00614998]
 [0.0029888  0.0014508  0.01462297 0.         0.00203045 0.00293289]]


In [58]:
print(neigh.kneighbors([[0.11214577, 0.0002, 0.01343935, 0., 0.07464021, 0.02471822]], return_distance=False))

[[1]]


In [59]:
print(dfs.loc[dfs['id'] == '1']['cuisine'].unique())


['mexican']
