In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.impute import  SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
#from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression, Ridge
from src.data import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import TruncatedSVD
import os
import joblib

In [84]:
MODELS_FILE_PATH = './../models'
INTERIM_FILE_PATH = './../data/interim'
INPUT_DATA_FILE_PATH = './../data/processed/'

In [85]:
score_predictor = joblib.load(os.path.join(MODELS_FILE_PATH,'score_predictor.joblib'))
canonical_df = pd.read_csv(os.path.join(INPUT_DATA_FILE_PATH,'ingredients_data_format.csv'))
statistics_df = pd.read_csv(os.path.join(INTERIM_FILE_PATH,'Agribalyse_MinMax ingredient.csv'))

In [77]:
canonical_df

Unnamed: 0,Abats de bœuf,Abats de porc,Abats de poulet,Abats de veau,Abricot,Ail,Amande,Amidon de maïs,Ananas,Anchois,...,max_EF_Viande de moutton sans os,max_EF_Viande de porc maigre,max_EF_Viande de poulet sans os,max_EF_Viande de veau sans os,max_EF_Vin blanc,max_EF_Vin rouge,max_EF_Yaourt,max_EF_citron,max_EF_Échalote,max_EF_Œuf de poule
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [114]:
statistics_df[(statistics_df['min_EF'] == 0) | (statistics_df['max_EF'] == 0)]

Unnamed: 0,Ingredients,min_EF,max_EF
24,Bouillon,0.0,0.0
72,Fond de veau pour la sauce et la cuisson,0.0,0.0
132,Miel,0.0,0.0


In [90]:
def rename_pivoted_columns(pivot_ing_minmax: pd.DataFrame)->pd.DataFrame:
    new_minmax_df = pivot_ing_minmax.copy()
    new_col_index = [f'{multiindex[0]}_{multiindex[1]}' if multiindex[1]!='' else multiindex[0] for multiindex in pivot_ing_minmax.columns]
    new_minmax_df.columns=new_col_index
    new_minmax_df = new_minmax_df.fillna(0)
    #new_minmax_df = new_minmax_df.drop(columns=drop_cols)
    return new_minmax_df

In [97]:
rename_pivoted_columns(statistics_df.pivot(columns='Ingredients')).sum(axis=1).values[20:100]

array([0.27272785, 0.07171915, 0.0038529 , 0.04591784, 0.        ,
       0.02822683, 3.25866228, 0.54801075, 2.1577076 , 1.99751017,
       0.5354004 , 0.36005274, 0.22210512, 0.06122442, 0.06895868,
       0.65865529, 0.03083194, 0.26639115, 0.01213799, 0.81754966,
       0.6665032 , 0.10893669, 0.08299182, 0.13426097, 0.0730855 ,
       0.43107822, 0.00732528, 0.02215442, 0.09056605, 0.03807876,
       0.12041573, 0.33621501, 0.08776618, 0.00362528, 0.01818575,
       0.02458842, 0.25949921, 2.6933362 , 0.00438443, 0.04710964,
       0.0286297 , 0.24672387, 0.01719404, 0.02926277, 0.02299318,
       0.02021314, 0.01132331, 0.75424889, 0.38864866, 0.055584  ,
       0.05194356, 0.01209109, 0.        , 0.4889364 , 0.27753071,
       0.04117245, 0.40334876, 0.18662094, 0.1809714 , 0.34376655,
       0.18853155, 0.42066557, 0.02570179, 0.00415534, 0.04834971,
       0.01239114, 0.02262206, 0.05639067, 0.16589394, 0.04486887,
       0.03781389, 0.41623351, 0.01106621, 0.16554232, 0.11567

In [80]:
canonical_df.iloc[:,213:].sum().sum()

0.0

In [75]:
canonical_df.columns[:213]

Index(['Abats de bœuf', 'Abats de porc', 'Abats de poulet', 'Abats de veau',
       'Abricot', 'Ail', 'Amande', 'Amidon de maïs', 'Ananas', 'Anchois',
       ...
       'Viande de moutton sans os', 'Viande de porc maigre',
       'Viande de poulet sans os', 'Viande de veau sans os', 'Vin blanc',
       'Vin rouge', 'Yaourt', 'citron', 'Échalote', 'Œuf de poule'],
      dtype='object', length=213)

In [26]:
def rename_pivot_names(pivot_ing_minmax: pd.DataFrame)->pd.DataFrame:
    new_minmax_df = pivot_ing_minmax.copy()
    new_col_index = [f'{multiindex[0]}_{multiindex[1]}' if multiindex[1]!='' else multiindex[0] for multiindex in pivot_ing_minmax.columns]
    new_minmax_df.columns=new_col_index
    new_minmax_df = new_minmax_df.fillna(0)
    #new_minmax_df = new_minmax_df.drop(columns=drop_cols)
    return new_minmax_df

In [27]:
pivot_desc_ing = rename_pivot_names(pivot_desc_ing)


In [28]:
pivot_desc_ing

Unnamed: 0,min_EF_Abats de bœuf,min_EF_Abats de porc,min_EF_Abats de poulet,min_EF_Abats de veau,min_EF_Abricot,min_EF_Ail,min_EF_Amande,min_EF_Amidon de maïs,min_EF_Ananas,min_EF_Anchois,...,max_EF_Viande de moutton sans os,max_EF_Viande de porc maigre,max_EF_Viande de poulet sans os,max_EF_Viande de veau sans os,max_EF_Vin blanc,max_EF_Vin rouge,max_EF_Yaourt,max_EF_citron,max_EF_Échalote,max_EF_Œuf de poule
0,0.365335,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.000000,0.008707,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.000000,0.000000,0.003403,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.000000,0.000000,0.398916,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.000000,0.000000,0.000000,0.000000,0.004451,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.137731,0.000000,0.000000,0.000000,0.000000
209,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.017149,0.000000,0.000000,0.000000
210,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.010579,0.000000,0.000000
211,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.009161,0.000000


In [34]:
ingred_df = pd.read_csv('./../data/processed/Agribalyse_Detail ingredient.csv')

In [None]:
def ingredient_to_dataframe(liste_ingredients, ingredient_for_recipe):
    #Create dataframe of all ingredients
    initial_dataframe=pd.DataFrame(data=liste_ingredients,columns=["Ingredients"])
    #Create dataframe initialized at a zero value
    initial_dataframe["Presence"] = np.zeros(len(liste_ingredients))
    #Locate ingredient and flag it as a 1 value in Presence column
    initial_dataframe.loc[initial_dataframe["Ingredients"].isin(ingredient_for_recipe),'Presence']=1.0

    return initial_dataframe

In [None]:
user_ingredients = ['', '', '', '', '']