## Importing necessary Tools

In [16]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import numpy as np
from sklearn.preprocessing import normalize
from tensorflow.keras.preprocessing import image
import ast

## Importing the Dataset

In [2]:
data=pd.read_csv('dataset/raw-data_recipe.csv')

In [3]:
data.head()

Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,ingredients,cooking_directions,nutritions,reviews
0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,pork belly^smoked paprika^kosher salt^ground b...,{'directions': u'Prep\n5 m\nCook\n2 h 45 m\nRe...,"{u'niacin': {u'hasCompleteData': False, u'name...","{8542392: {'rating': 5, 'followersCount': 11, ..."
1,240488,"Pork Loin, Apples, and Sauerkraut",4.764706,https://images.media-allrecipes.com/userphotos...,29,sauerkraut drained^Granny Smith apples sliced^...,{'directions': u'Prep\n15 m\nCook\n2 h 30 m\nR...,"{u'niacin': {u'hasCompleteData': False, u'name...","{3574785: {'rating': 5, 'followersCount': 0, '..."
2,218939,Foolproof Rosemary Chicken Wings,4.571429,https://images.media-allrecipes.com/userphotos...,12,chicken wings^sprigs rosemary^head garlic^oliv...,"{'directions': u""Prep\n20 m\nCook\n40 m\nReady...","{u'niacin': {u'hasCompleteData': True, u'name'...","{13774946: {'rating': 5, 'followersCount': 0, ..."
3,87211,Chicken Pesto Paninis,4.625,https://images.media-allrecipes.com/userphotos...,163,focaccia bread quartered^prepared basil pesto^...,{'directions': u'Prep\n15 m\nCook\n5 m\nReady ...,"{u'niacin': {u'hasCompleteData': True, u'name'...","{1563136: {'rating': 5, 'followersCount': 0, '..."
4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,red potatoes^strips bacon^Sauce:^heavy whippin...,{'directions': u'Prep\n20 m\nCook\n45 m\nReady...,"{u'niacin': {u'hasCompleteData': True, u'name'...","{2945555: {'rating': 5, 'followersCount': 6690..."


In [4]:
data.shape

(49698, 9)

In [5]:
data.isnull().sum()

recipe_id             0
recipe_name           0
aver_rate             0
image_url             0
review_nums           0
ingredients           0
cooking_directions    0
nutritions            0
reviews               0
dtype: int64

In [6]:
data['cooking_directions'][0]

"{'directions': u'Prep\\n5 m\\nCook\\n2 h 45 m\\nReady In\\n11 h 50 m\\nPreheat oven to 200 degrees F (95 degrees C).\\nSeason pork belly with paprika, salt, and pepper. Tightly wrap pork twice in heavy-duty aluminum foil. Place on a baking sheet and bake in the preheated oven for 2 1/2 hours. Turn off the oven; let pork rest in the oven for 1 hour. Remove meat from oven, leaving it wrapped in aluminum foil, and refrigerate at least 8 hours or overnight.\\nRemove pork from foil and slice across the grain in 1/4-inch thick slices. Working in batches, cook pork in a non-stick skillet over medium heat until golden and crisped, 6 to 8 minutes per slice.'}"

## Preprocessing the Data

###### we don't have any use with the cooking_directions and reviews column in the dataset ,sot droping it will the best option

In [7]:
data=data.drop(columns=['cooking_directions','reviews'])
data.head()

Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,ingredients,nutritions
0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,pork belly^smoked paprika^kosher salt^ground b...,"{u'niacin': {u'hasCompleteData': False, u'name..."
1,240488,"Pork Loin, Apples, and Sauerkraut",4.764706,https://images.media-allrecipes.com/userphotos...,29,sauerkraut drained^Granny Smith apples sliced^...,"{u'niacin': {u'hasCompleteData': False, u'name..."
2,218939,Foolproof Rosemary Chicken Wings,4.571429,https://images.media-allrecipes.com/userphotos...,12,chicken wings^sprigs rosemary^head garlic^oliv...,"{u'niacin': {u'hasCompleteData': True, u'name'..."
3,87211,Chicken Pesto Paninis,4.625,https://images.media-allrecipes.com/userphotos...,163,focaccia bread quartered^prepared basil pesto^...,"{u'niacin': {u'hasCompleteData': True, u'name'..."
4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,red potatoes^strips bacon^Sauce:^heavy whippin...,"{u'niacin': {u'hasCompleteData': True, u'name'..."


###### Now we want to make the average_rating to a 2 decimal number.
###### For that we can create a function

In [8]:
def avg_rating(col):
    return f'{col:.2f}'

###### Apply the function to the aver_rate column in the dataset

In [9]:
data.aver_rate=data.aver_rate.apply(avg_rating)
data.head()

Unnamed: 0,recipe_id,recipe_name,aver_rate,image_url,review_nums,ingredients,nutritions
0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,pork belly^smoked paprika^kosher salt^ground b...,"{u'niacin': {u'hasCompleteData': False, u'name..."
1,240488,"Pork Loin, Apples, and Sauerkraut",4.76,https://images.media-allrecipes.com/userphotos...,29,sauerkraut drained^Granny Smith apples sliced^...,"{u'niacin': {u'hasCompleteData': False, u'name..."
2,218939,Foolproof Rosemary Chicken Wings,4.57,https://images.media-allrecipes.com/userphotos...,12,chicken wings^sprigs rosemary^head garlic^oliv...,"{u'niacin': {u'hasCompleteData': True, u'name'..."
3,87211,Chicken Pesto Paninis,4.62,https://images.media-allrecipes.com/userphotos...,163,focaccia bread quartered^prepared basil pesto^...,"{u'niacin': {u'hasCompleteData': True, u'name'..."
4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,red potatoes^strips bacon^Sauce:^heavy whippin...,"{u'niacin': {u'hasCompleteData': True, u'name'..."


In [13]:
data.dtypes

recipe_id        int64
recipe_name     object
aver_rate      float64
image_url       object
review_nums      int64
ingredients     object
nutritions      object
dtype: object

###### The function will make the datatype to object so we need to convert it back to float

In [12]:
data.aver_rate=data.aver_rate.astype(float)
data.dtypes

recipe_id        int64
recipe_name     object
aver_rate      float64
image_url       object
review_nums      int64
ingredients     object
nutritions      object
dtype: object

###### We have the nutritions data as a string of dictionary

In [19]:
data.nutritions[0]

"{u'niacin': {u'hasCompleteData': False, u'name': u'Niacin Equivalents', u'amount': 9.319291, u'percentDailyValue': u'72', u'displayValue': u'9', u'unit': u'mg'}, u'sugars': {u'hasCompleteData': True, u'name': u'Sugars', u'amount': 0.09355932, u'percentDailyValue': u'0', u'displayValue': u'0.1', u'unit': u'g'}, u'sodium': {u'hasCompleteData': True, u'name': u'Sodium', u'amount': 2017.13, u'percentDailyValue': u'81', u'displayValue': u'2017', u'unit': u'mg'}, u'carbohydrates': {u'hasCompleteData': True, u'name': u'Carbohydrates', u'amount': 1.797819, u'percentDailyValue': u'< 1', u'displayValue': u'1.8', u'unit': u'g'}, u'vitaminB6': {u'hasCompleteData': False, u'name': u'Vitamin B6', u'amount': 0.2329798, u'percentDailyValue': u'15', u'displayValue': u'< 1', u'unit': u'mg'}, u'calories': {u'hasCompleteData': True, u'name': u'Calories', u'amount': 308.1481, u'percentDailyValue': u'15', u'displayValue': u'308', u'unit': u'kcal'}, u'thiamin': {u'hasCompleteData': False, u'name': u'Thiamin

###### so we have to convert it into a python dictionary

In [17]:
list_of_dict = []

for row in data.nutritions:
    list_of_dict.append(ast.literal_eval(row))
list_of_dict[1]

{'niacin': {'hasCompleteData': False,
  'name': 'Niacin Equivalents',
  'amount': 15.6016,
  'percentDailyValue': '120',
  'displayValue': '16',
  'unit': 'mg'},
 'sugars': {'hasCompleteData': False,
  'name': 'Sugars',
  'amount': 19.84146,
  'percentDailyValue': '0',
  'displayValue': '19.8',
  'unit': 'g'},
 'sodium': {'hasCompleteData': False,
  'name': 'Sodium',
  'amount': 2606.764,
  'percentDailyValue': '104',
  'displayValue': '2607',
  'unit': 'mg'},
 'carbohydrates': {'hasCompleteData': True,
  'name': 'Carbohydrates',
  'amount': 32.08176,
  'percentDailyValue': '10',
  'displayValue': '32.1',
  'unit': 'g'},
 'vitaminB6': {'hasCompleteData': False,
  'name': 'Vitamin B6',
  'amount': 1.328631,
  'percentDailyValue': '83',
  'displayValue': '1',
  'unit': 'mg'},
 'calories': {'hasCompleteData': True,
  'name': 'Calories',
  'amount': 371.7219,
  'percentDailyValue': '19',
  'displayValue': '372',
  'unit': 'kcal'},
 'thiamin': {'hasCompleteData': False,
  'name': 'Thiamin',

###### Now we have to extract percent daily values for some important  nutritions

In [20]:
calories_list = []
fat_list = []
carbohydrates_list = []
protein_list = []
cholesterol_list = []
sodium_list = []
fiber_list = []