# Imports

In [1]:
import json
import csv
from pprint import pprint

from tqdm import tqdm
import pandas as pd

# Open datasets as DataFrames

In [2]:
!ls ../dataset

[0m[01;34mrecipe1M_images_train[0m/  [01;32mlayer2.json[0m*
[01;32mdet_ingrs.json[0m*         [01;32mrecipe1M_images_train.tar[0m*
[01;32mfiltered.csv[0m*           [01;32mrecipe1M_layers.tar.gz[0m*
[01;32mlayer1.csv[0m*             [01;32mtrain_dataset_with_images.csv[0m*
[01;32mlayer1.json[0m*            [01;32mtrain_filtered.csv[0m*
[01;32mlayer2.csv[0m*


In [3]:
df_layer1 = pd.read_csv("../dataset/layer1.csv")
df_layer2 = pd.read_csv("../dataset/layer2.csv")

In [4]:
df_layer1.head()

Unnamed: 0,id,title,partition,url,instructions,ingredients
0,000018c8a5,Worlds Best Mac and Cheese,train,http://www.epicurious.com/recipes/food/views/-...,Preheat the oven to 350 F. Butter or oil an 8-...,6 ounces penne;2 cups Beechers Flagship Cheese...
1,000033e39b,Dilly Macaroni Salad Recipe,train,http://cookeatshare.com/recipes/dilly-macaroni...,Cook macaroni according to package directions;...,1 c. elbow macaroni;1 c. cubed American cheese...
2,000035f7ed,Gazpacho,train,http://www.foodnetwork.com/recipes/gazpacho1.html,Add the tomatoes to a food processor with a pi...,"8 tomatoes, quartered;Kosher salt;1 red onion,..."
3,00003a70b1,Crunchy Onion Potato Bake,test,http://www.food.com/recipe/crunchy-onion-potat...,Preheat oven to 350 degrees Fahrenheit.;Spray ...,2 12 cups milk;1 12 cups water;14 cup butter;m...
4,00004320bb,Cool 'n Easy Creamy Watermelon Pie,train,http://www.food.com/recipe/cool-n-easy-creamy-...,Dissolve Jello in boiling water.;Allow to cool...,1 (3 ounce) package watermelon gelatin;14 cup ...


In [5]:
df_layer2.head()

Unnamed: 0,id,image_file_names
0,00003a70b1,3e233001e2.jpg;7f749987f9.jpg;aaf6b2dcd3.jpg
1,000075604a,6bdca6e490.jpg
2,00007bfd16,6409eab844.jpg;f7cb3de295.jpg
3,000095fc1d,a1374cdd98.jpg
4,0000b1e2b5,cb1a684683.jpg


In [6]:
# Check if every recipe has image

len(df_layer1)==len(df_layer2)

False

In [7]:
from pathlib import Path

def get_file(id: str) -> Path:
    """
    Get path from the id of the image
    """
    char = [_ for _ in id[:4]]
    return Path('../dataset/recipe1M_images_train/train')/char[0]/char[1]/char[2]/char[3]/f"{id}"

# Create DataFrame with recipes that are supposed to have images

In [8]:
df3 = pd.merge(df_layer1, df_layer2, on="id")
len(df3)

402760

In [9]:
df3.head()

Unnamed: 0,id,title,partition,url,instructions,ingredients,image_file_names
0,00003a70b1,Crunchy Onion Potato Bake,test,http://www.food.com/recipe/crunchy-onion-potat...,Preheat oven to 350 degrees Fahrenheit.;Spray ...,2 12 cups milk;1 12 cups water;14 cup butter;m...,3e233001e2.jpg;7f749987f9.jpg;aaf6b2dcd3.jpg
1,000075604a,Kombu Tea Grilled Chicken Thigh,train,https://cookpad.com/us/recipes/150100-kombu-te...,Pierce the skin of the chicken with a fork or ...,2 Chicken thighs;2 tsp Kombu tea;1 White pepper,6bdca6e490.jpg
2,00007bfd16,Strawberry Rhubarb Dump Cake,train,http://www.food.com/recipe/strawberry-rhubarb-...,Put ingredients in a buttered 9 x 12 x 2-inch ...,"6 -8 cups fresh rhubarb, or;6 -8 cups frozen r...",6409eab844.jpg;f7cb3de295.jpg
3,000095fc1d,Yogurt Parfaits,train,http://tastykitchen.com/recipes/breakfastbrunc...,Layer all ingredients in a serving dish.,"8 ounces, weight Light Fat Free Vanilla Yogurt...",a1374cdd98.jpg
4,0000b1e2b5,Fennel-Rubbed Pork Tenderloin with Roasted Fen...,train,http://www.epicurious.com/recipes/food/views/f...,Preheat oven to 350F with rack in middle.;Crus...,1 teaspoon fennel seeds;1 pound pork tenderloi...,cb1a684683.jpg


In [10]:
df3.to_csv("../dataset/train_filtered.csv", index=False)

In [11]:
!ls -alh ../dataset/filtered.csv

-rwxr-xr-x 1 chitrey chitrey 413M 2021-08-21 13:54 [0m[01;32m../dataset/filtered.csv[0m*


In [12]:
def filter_rows_with_image(portion: str, length: int):
    """
    Filter out the rows from dataframe that doesn't have images
    in image directory
    """

    with open("../dataset/filtered.csv", 'r') as infile:
        reader = csv.reader(infile)
        with open(f"../dataset/{portion}_dataset_with_images.csv", 'w') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(next(reader))
            for row in tqdm(reader, total=length):
                if row[2] == portion:
                    images = row[6].split(';')
                    valid_images = [image for image in images if get_file(image).exists()]
                    if valid_images:
                        row[6] =  ';'.join(valid_images)
                        writer.writerow(row)

In [13]:
filter_rows_with_image("train", len(df3.index))

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 402760/402760 [1:38:10<00:00, 68.37it/s]


In [14]:
!ls -alh ../dataset/train_dataset_with_images.csv

-rwxr-xr-x 1 chitrey chitrey 290M 2021-08-22 09:42 [0m[01;32m../dataset/train_dataset_with_images.csv[0m*


In [15]:
pd.read_csv("../dataset/train_dataset_with_images.csv").head()

Unnamed: 0,id,title,partition,url,instructions,ingredients,image_file_names
0,000075604a,Kombu Tea Grilled Chicken Thigh,train,https://cookpad.com/us/recipes/150100-kombu-te...,Pierce the skin of the chicken with a fork or ...,2 Chicken thighs;2 tsp Kombu tea;1 White pepper,6bdca6e490.jpg
1,00007bfd16,Strawberry Rhubarb Dump Cake,train,http://www.food.com/recipe/strawberry-rhubarb-...,Put ingredients in a buttered 9 x 12 x 2-inch ...,"6 -8 cups fresh rhubarb, or;6 -8 cups frozen r...",6409eab844.jpg;f7cb3de295.jpg
2,000095fc1d,Yogurt Parfaits,train,http://tastykitchen.com/recipes/breakfastbrunc...,Layer all ingredients in a serving dish.,"8 ounces, weight Light Fat Free Vanilla Yogurt...",a1374cdd98.jpg
3,0000b1e2b5,Fennel-Rubbed Pork Tenderloin with Roasted Fen...,train,http://www.epicurious.com/recipes/food/views/f...,Preheat oven to 350F with rack in middle.;Crus...,1 teaspoon fennel seeds;1 pound pork tenderloi...,cb1a684683.jpg
4,0000c79afb,Pink Sangria,train,http://www.food.com/recipe/pink-sangria-305385,"In a pitcher, combine all the ingredients exce...","1 (750 ml) bottle rose wine, chilled;14 cup br...",2f4b4c4452.jpg


# Cleanup

In [16]:
!rm ../dataset/train_filtered.csv