In [1]:
import pandas as pd
import numpy as np
import pickle
import json

# TODO: Check for one off errors for pushed tables since we incremented index only in the map

In [2]:
with open("scraped_img_urls.pkl", "rb") as f:
    scraped_img_urls = pickle.load(f)
with open("cuisine_lst.pkl", "rb") as f:
    cuisine_lst = pickle.load(f)
    cuisine_lst = [cuisine.lower() for cuisine in cuisine_lst]
    cuisine_map = {cuisine: i+1 for i, cuisine in enumerate(cuisine_lst)}
with open("food_data/ingr_map.pkl", "rb") as f:
    ingr_map_df = pickle.load(f)

In [3]:
recipe_df = pd.read_csv("recipe_df_178k_with_updated_users.csv", 
                        converters={"ingredients": eval, "quantities": eval, 
                                    "measurement_units": eval, "tags": eval,
                                   "nutrition": eval, "steps": eval, })
user_df = pd.read_csv("users.csv")
raw_interactions_df = pd.read_csv("interactions_with_updated_users.csv")
pp_recipe_df = pd.read_csv("food_data/PP_recipes.csv", converters={"ingredient_ids": eval})

In [4]:
recipe_df["calories"] = recipe_df["nutrition"].str[0]

In [5]:
recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178265 entries, 0 to 178264
Data columns (total 16 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   name               178265 non-null  object 
 1   id                 178265 non-null  int64  
 2   minutes            178265 non-null  int64  
 3   contributor_id     178265 non-null  int64  
 4   submitted          178265 non-null  object 
 5   tags               178265 non-null  object 
 6   nutrition          178265 non-null  object 
 7   n_steps            178265 non-null  int64  
 8   steps              178265 non-null  object 
 9   description        174311 non-null  object 
 10  ingredients        178265 non-null  object 
 11  n_ingredients      178265 non-null  int64  
 12  food_recipe_url    178265 non-null  object 
 13  quantities         178265 non-null  object 
 14  measurement_units  178265 non-null  object 
 15  calories           178265 non-null  float64
dtypes:

In [6]:
display(recipe_df.iloc[0].ingredients)
display(recipe_df.iloc[0].quantities)
display(recipe_df.iloc[0].measurement_units)
display(recipe_df.iloc[0].tags)

['winter squash',
 'mexican seasoning',
 'mixed spice',
 'honey',
 'butter',
 'olive oil',
 'salt']

['1', '1 -2', '1 -2', '', '', '', '']

['lb', 'teaspoon', 'teaspoon', '', '', '', '']

['60-minutes-or-less',
 'time-to-make',
 'course',
 'main-ingredient',
 'cuisine',
 'preparation',
 'occasion',
 'north-american',
 'side-dishes',
 'vegetables',
 'mexican',
 'easy',
 'fall',
 'holiday-event',
 'vegetarian',
 'winter',
 'dietary',
 'christmas',
 'seasonal',
 'squash']

## Converting Strings to Numbers. Three Cases
1. Fractions ("1/3", "1/2" --> 0.333, 0.5)
2. Ranges ("1 -2" --> 1)
3. Normal integers ("1", "4" --> 1, 4)
4. Range with Fractions ("1⁄4 - 1⁄2" --> 1/4)

In [7]:
def convert_to_float(frac_str):
    if frac_str == "":
        return np.nan
    if " " in frac_str:
        frac_str = frac_str.split(" ")[0]
    try:
        return float(frac_str)
    except ValueError:
        if "⁄" in frac_str:
            split_frac = frac_str.split("⁄")
            num, denom = split_frac[0], split_frac[1]
            float_str = float(num) / float(denom)
        return float_str

In [8]:
recipe_df["quantities"] = recipe_df.quantities.map(lambda x: [convert_to_float(item) for item in x])
recipe_df["measurement_units"] = recipe_df["measurement_units"].map(
    lambda x: np.array([item if item != "" else np.nan for item in x], dtype=object)
)

In [9]:
recipe_df["tags"].head()

0    [60-minutes-or-less, time-to-make, course, mai...
1    [30-minutes-or-less, time-to-make, course, mai...
2    [60-minutes-or-less, time-to-make, course, mai...
3    [15-minutes-or-less, time-to-make, course, mai...
4    [15-minutes-or-less, time-to-make, course, mai...
Name: tags, dtype: object

In [10]:
def output_tag(tag_lst):
    lst = [tag for tag in tag_lst for cuisine in cuisine_map if (cuisine in tag)]
    if len(lst) == 0:
        return "global"
    return lst[0]

In [11]:
recipe_df["cuisine"] = recipe_df.tags.map(output_tag)

In [12]:
recipe_df["img_url"] = scraped_img_urls

In [13]:
recipe_df["serves"] = np.random.choice(np.arange(1, 5), size=recipe_df.shape[0])

In [14]:
recipe_df.columns

Index(['name', 'id', 'minutes', 'contributor_id', 'submitted', 'tags',
       'nutrition', 'n_steps', 'steps', 'description', 'ingredients',
       'n_ingredients', 'food_recipe_url', 'quantities', 'measurement_units',
       'calories', 'cuisine', 'img_url', 'serves'],
      dtype='object')

In [15]:
recipe_df = recipe_df.rename(columns={"id": "recipe_id", "minutes": "time_to_prepare", 
                          "submitted": "date_submitted", "steps": "recipe_text", "name": "recipe_name",
                         "contributor_id": "creator_id"})

In [16]:
recipe_df["recipe_text"] = recipe_df["recipe_text"].map(lambda x: ". ".join([elem.capitalize() for elem in x]))

In [17]:
cuisine_df = pd.DataFrame({"cuisine_id": np.arange(1, recipe_df["cuisine"].unique().shape[0]+1), 
                           "cuisine_name": recipe_df["cuisine"].unique()})

In [18]:
cuisine_df.head()

Unnamed: 0,cuisine_id,cuisine_name
0,1,north-american
1,2,global
2,3,asian
3,4,german
4,5,italian


In [19]:
recipe_df = recipe_df.merge(pp_recipe_df, 
                left_on="recipe_id",
                            
                right_on="id")[recipe_df.columns.tolist() + ["ingredient_ids"]]
# Sometimes, ingredient_ids doesn't equal quantities due to scraping, website and regex issues.
# Discard those entries
recipe_df = recipe_df[~recipe_df.index.isin(
    recipe_df[recipe_df["ingredient_ids"].str.len() != recipe_df["quantities"].str.len()].index
)]
recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 155751 entries, 0 to 178263
Data columns (total 20 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   recipe_name        155751 non-null  object 
 1   recipe_id          155751 non-null  int64  
 2   time_to_prepare    155751 non-null  int64  
 3   creator_id         155751 non-null  int64  
 4   date_submitted     155751 non-null  object 
 5   tags               155751 non-null  object 
 6   nutrition          155751 non-null  object 
 7   n_steps            155751 non-null  int64  
 8   recipe_text        155751 non-null  object 
 9   description        152310 non-null  object 
 10  ingredients        155751 non-null  object 
 11  n_ingredients      155751 non-null  int64  
 12  food_recipe_url    155751 non-null  object 
 13  quantities         155751 non-null  object 
 14  measurement_units  155751 non-null  object 
 15  calories           155751 non-null  float64
 16  cu

In [20]:
recipe_df = recipe_df.merge(cuisine_df, 
                left_on="cuisine", 
                right_on="cuisine_name")[recipe_df.columns.tolist() + ["cuisine_id"]]

recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 155751 entries, 0 to 155750
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   recipe_name        155751 non-null  object 
 1   recipe_id          155751 non-null  int64  
 2   time_to_prepare    155751 non-null  int64  
 3   creator_id         155751 non-null  int64  
 4   date_submitted     155751 non-null  object 
 5   tags               155751 non-null  object 
 6   nutrition          155751 non-null  object 
 7   n_steps            155751 non-null  int64  
 8   recipe_text        155751 non-null  object 
 9   description        152310 non-null  object 
 10  ingredients        155751 non-null  object 
 11  n_ingredients      155751 non-null  int64  
 12  food_recipe_url    155751 non-null  object 
 13  quantities         155751 non-null  object 
 14  measurement_units  155751 non-null  object 
 15  calories           155751 non-null  float64
 16  cu

In [21]:
pd.set_option('display.max_columns', None)

In [22]:
def unnesting(df, explode):
    idx = df.index.repeat(df[explode[0]].str.len())
    df1 = pd.concat([
        pd.DataFrame({x: np.concatenate(df[x].values)}) for x in explode], axis=1)
    df1.index = idx

    return df1.join(df.drop(explode, 1), how='left').reset_index(drop=True)

In [23]:
recipe_ingredients_df = unnesting(recipe_df, ["ingredients", "ingredient_ids", "quantities", "measurement_units"])

In [24]:
recipe_ingredients_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1366604 entries, 0 to 1366603
Data columns (total 21 columns):
 #   Column             Non-Null Count    Dtype  
---  ------             --------------    -----  
 0   ingredients        1366604 non-null  object 
 1   ingredient_ids     1366604 non-null  int64  
 2   quantities         1281236 non-null  float64
 3   measurement_units  1278258 non-null  object 
 4   recipe_name        1366604 non-null  object 
 5   recipe_id          1366604 non-null  int64  
 6   time_to_prepare    1366604 non-null  int64  
 7   creator_id         1366604 non-null  int64  
 8   date_submitted     1366604 non-null  object 
 9   tags               1366604 non-null  object 
 10  nutrition          1366604 non-null  object 
 11  n_steps            1366604 non-null  int64  
 12  recipe_text        1366604 non-null  object 
 13  description        1336514 non-null  object 
 14  n_ingredients      1366604 non-null  int64  
 15  food_recipe_url    1366604 non-n

In [25]:
recipe_ingredients_df.head()

Unnamed: 0,ingredients,ingredient_ids,quantities,measurement_units,recipe_name,recipe_id,time_to_prepare,creator_id,date_submitted,tags,nutrition,n_steps,recipe_text,description,n_ingredients,food_recipe_url,calories,cuisine,img_url,serves,cuisine_id
0,winter squash,7933,1.0,lb,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,7,https://www.food.com/recipe/arriba---baked-win...,51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,1
1,mexican seasoning,4694,1.0,teaspoon,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,7,https://www.food.com/recipe/arriba---baked-win...,51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,1
2,mixed spice,4795,1.0,teaspoon,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,7,https://www.food.com/recipe/arriba---baked-win...,51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,1
3,honey,3723,,,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,7,https://www.food.com/recipe/arriba---baked-win...,51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,1
4,butter,840,,,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,7,https://www.food.com/recipe/arriba---baked-win...,51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,1


In [26]:
temp_df = recipe_ingredients_df[(recipe_ingredients_df["quantities"].notna()
                              & recipe_ingredients_df["measurement_units"].isna())]

In [27]:
mode = lambda x: x.mode()[0] if x.mode().size > 0 else x.mode()
most_popular_measure_units = recipe_ingredients_df[["ingredient_ids", "measurement_units"]].groupby(
    "ingredient_ids"
)["measurement_units"].agg(mode).to_dict()

In [28]:
temp_df["measurement_units"].fillna(temp_df["ingredient_ids"].map(most_popular_measure_units))

96              cup
355             cup
1895          flour
2237            cup
3701        unbaked
             ...   
1365579    boneless
1365935        eggs
1366091    teaspoon
1366261         lbs
1366505        plum
Name: measurement_units, Length: 2978, dtype: object

In [29]:
recipe_ingredients_df.loc[recipe_ingredients_df.index.isin(temp_df.index), "measurement_units"] = (
    temp_df["measurement_units"].fillna(
    temp_df["ingredient_ids"].map(most_popular_measure_units)))

In [30]:
recipe_ingredients_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1366604 entries, 0 to 1366603
Data columns (total 21 columns):
 #   Column             Non-Null Count    Dtype  
---  ------             --------------    -----  
 0   ingredients        1366604 non-null  object 
 1   ingredient_ids     1366604 non-null  int64  
 2   quantities         1281236 non-null  float64
 3   measurement_units  1281236 non-null  object 
 4   recipe_name        1366604 non-null  object 
 5   recipe_id          1366604 non-null  int64  
 6   time_to_prepare    1366604 non-null  int64  
 7   creator_id         1366604 non-null  int64  
 8   date_submitted     1366604 non-null  object 
 9   tags               1366604 non-null  object 
 10  nutrition          1366604 non-null  object 
 11  n_steps            1366604 non-null  int64  
 12  recipe_text        1366604 non-null  object 
 13  description        1336514 non-null  object 
 14  n_ingredients      1366604 non-null  int64  
 15  food_recipe_url    1366604 non-n

In [31]:
recipe_ingredients_df = recipe_ingredients_df[["recipe_id", "ingredient_ids", 
                                                             "quantities", "measurement_units"]]
recipe_ingredients_df = recipe_ingredients_df.rename(columns={
    "ingredient_ids": "ingredient_id", 
    "quantities": "quantity",
    "measurement_units": "measurement_unit"})
recipe_ingredients_df.head()

Unnamed: 0,recipe_id,ingredient_id,quantity,measurement_unit
0,137739,7933,1.0,lb
1,137739,4694,1.0,teaspoon
2,137739,4795,1.0,teaspoon
3,137739,3723,,
4,137739,840,,


In [32]:
recipe_ingredients_df[recipe_ingredients_df["recipe_id"].isin(recipe_df["recipe_id"])]

Unnamed: 0,recipe_id,ingredient_id,quantity,measurement_unit
0,137739,7933,1.0,lb
1,137739,4694,1.0,teaspoon
2,137739,4795,1.0,teaspoon
3,137739,3723,,
4,137739,840,,
...,...,...,...,...
1366599,502817,3184,1.0,teaspoon
1366600,502817,3248,1.0,teaspoon
1366601,502817,590,0.5,teaspoon
1366602,502817,6335,0.5,cup


In [33]:
recipe_ingredients_df

Unnamed: 0,recipe_id,ingredient_id,quantity,measurement_unit
0,137739,7933,1.0,lb
1,137739,4694,1.0,teaspoon
2,137739,4795,1.0,teaspoon
3,137739,3723,,
4,137739,840,,
...,...,...,...,...
1366599,502817,3184,1.0,teaspoon
1366600,502817,3248,1.0,teaspoon
1366601,502817,590,0.5,teaspoon
1366602,502817,6335,0.5,cup


In [34]:
recipe_ingredients_df.to_csv("recipe_ingredients_df.csv", index=False)

In [35]:
recipe_df.to_csv("recipe_155k_with_all_updated_columns.csv",index=False)

In [36]:
cuisine_df.to_csv("cuisines.csv", index=False)

In [37]:
ingr_map_df.to_csv("ingr_map.csv", index=False)

In [38]:
pp_recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178265 entries, 0 to 178264
Data columns (total 8 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   id                 178265 non-null  int64 
 1   i                  178265 non-null  int64 
 2   name_tokens        178265 non-null  object
 3   ingredient_tokens  178265 non-null  object
 4   steps_tokens       178265 non-null  object
 5   techniques         178265 non-null  object
 6   calorie_level      178265 non-null  int64 
 7   ingredient_ids     178265 non-null  object
dtypes: int64(3), object(5)
memory usage: 10.9+ MB


In [39]:
recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 155751 entries, 0 to 155750
Data columns (total 21 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   recipe_name        155751 non-null  object 
 1   recipe_id          155751 non-null  int64  
 2   time_to_prepare    155751 non-null  int64  
 3   creator_id         155751 non-null  int64  
 4   date_submitted     155751 non-null  object 
 5   tags               155751 non-null  object 
 6   nutrition          155751 non-null  object 
 7   n_steps            155751 non-null  int64  
 8   recipe_text        155751 non-null  object 
 9   description        152310 non-null  object 
 10  ingredients        155751 non-null  object 
 11  n_ingredients      155751 non-null  int64  
 12  food_recipe_url    155751 non-null  object 
 13  quantities         155751 non-null  object 
 14  measurement_units  155751 non-null  object 
 15  calories           155751 non-null  float64
 16  cu

In [40]:
raw_interactions_df = raw_interactions_df[raw_interactions_df["recipe_id"].isin(recipe_df["recipe_id"])]
interactions_df = raw_interactions_df.rename(columns={"date": "interaction_date"})
interactions_df.head()

Unnamed: 0,user_id,recipe_id,interaction_date,rating,review
0,899,52469,2003-04-24,5,It's GENIUS! I love this recipe- thank you so ...
1,9527,241877,2009-02-20,4,Good side dish. I only made half a recipe and...
2,12460,373493,2012-07-07,5,Delicious! I did eyeball the honeydew (used a ...
3,2161,307866,2011-09-06,5,"Wow, this was such a great recipe. I wasn't su..."
4,4147,205407,2007-01-16,5,It was both simple and delicious. Although I a...


In [41]:
interactions_df.to_csv("interactions_df.csv", index=False)

In [42]:
unique_tags = set()
for tags in recipe_df["tags"].values:
    for tag in tags:
        unique_tags.add(tag)
tag_id_to_tag_map = {i+1: unique_tag for i, unique_tag in enumerate(unique_tags)}
tag_to_tag_id_map = {unique_tag: i+1 for i, unique_tag in enumerate(unique_tags)}
tags_df = pd.DataFrame.from_dict(tag_id_to_tag_map, orient="index")
tags_df["tag_id"] = tags_df.index
tags_df = tags_df.rename(columns={0: "tag_text"})
tags_df.head()

Unnamed: 0,tag_text,tag_id
1,,1
2,simply-potatoes,2
3,candy,3
4,to-go,4
5,carrots,5


In [43]:
tags_df.to_csv("tags.csv", index=False)

In [44]:
recipe_df.head()

Unnamed: 0,recipe_name,recipe_id,time_to_prepare,creator_id,date_submitted,tags,nutrition,n_steps,recipe_text,description,ingredients,n_ingredients,food_recipe_url,quantities,measurement_units,calories,cuisine,img_url,serves,ingredient_ids,cuisine_id
0,arriba baked winter squash mexican style,137739,55,1,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,Make a choice and proceed with recipe. Dependi...,autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7,https://www.food.com/recipe/arriba---baked-win...,"[1.0, 1.0, 1.0, nan, nan, nan, nan]","[lb, teaspoon, teaspoon, nan, nan, nan, nan]",51.5,north-american,https://img.sndimg.com/food/image/upload/w_555...,4,"[7933, 4694, 4795, 3723, 840, 5006, 6270]",1
1,a bit different breakfast pizza,31490,30,2,2002-06-17,"[30-minutes-or-less, time-to-make, course, mai...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,Preheat oven to 425 degrees f. Press dough int...,this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6,https://www.food.com/recipe/a-bit-different--b...,"[1.0, 1.0, 3.0, 0.5, nan, 2.0]",[(10 ounce) can prepared pizza crust (or use y...,173.4,north-american,https://img.sndimg.com/food/image/upload/w_555...,1,"[5481, 6324, 2499, 4717, 6276, 1170]",1
2,apple a day milk shake,5289,0,4,1999-12-06,"[15-minutes-or-less, time-to-make, course, mai...","[160.2, 10.0, 55.0, 3.0, 9.0, 20.0, 7.0]",4,Combine ingredients in blender. Cover and blen...,,"[milk, vanilla ice cream, frozen apple juice c...",4,https://www.food.com/recipe/apple-a-day--milk-...,"[1.0, 1.0, 2.0, 0.5]","[cup, scoop, tablespoons, apple]",160.2,north-american,https://geniuskitchen.sndimg.com/fdc-new/img/f...,2,"[4717, 7474, 2946, 150]",1
3,aww marinated olives,25274,15,5,2002-04-14,"[15-minutes-or-less, time-to-make, course, mai...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,Toast the fennel seeds and lightly crush them....,my italian mil was thoroughly impressed by my ...,"[fennel seeds, green olives, ripe olives, garl...",9,https://www.food.com/recipe/aww--marinated-oli...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0]","[teaspoons, cup, cup, clove, teaspoon, tablesp...",380.7,north-american,https://geniuskitchen.sndimg.com/fdc-new/img/f...,3,"[2587, 3437, 5002, 3184, 5324, 5068, 5058, 131...",1
4,better then bush s baked beans,67547,2970,9,2003-07-26,"[weeknight, time-to-make, course, main-ingredi...","[462.4, 28.0, 214.0, 69.0, 14.0, 29.0, 23.0]",9,In a very large sauce pan cover the beans and ...,i'd have to say that this is a labor of love d...,"[great northern bean, chicken bouillon cubes, ...",13,https://www.food.com/recipe/better-then-bush-s...,"[1.0, 4.0, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 5...","[package, chicken, cups, cup, cup, cup, cup, t...",462.4,north-american,https://img.sndimg.com/food/image/upload/w_555...,2,"[3384, 1248, 2045, 4807, 1833, 5010, 3217, 488...",1


In [45]:
recipe_df["tag_ids"] = recipe_df["tags"].map(
    lambda x: np.array([tag_to_tag_id_map[tag] for tag in x], dtype=object)
)

In [48]:
recipe_tags_df = unnesting(recipe_df, ["tags", "tag_ids"])
recipe_tags_df = recipe_tags_df.rename(columns={"tag_ids": "tag_id"})

In [52]:
columns_needed = ["recipe_id", "tag_id"]
recipe_tags_df = recipe_tags_df[columns_needed]
recipe_tags_df.head()

Unnamed: 0,recipe_id,tag_id
0,137739,257
1,137739,166
2,137739,321
3,137739,242
4,137739,514


In [55]:
recipe_tags_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2800407 entries, 0 to 2800406
Data columns (total 2 columns):
 #   Column     Dtype 
---  ------     ----- 
 0   recipe_id  int64 
 1   tag_id     object
dtypes: int64(1), object(1)
memory usage: 42.7+ MB


In [53]:
recipe_tags_df.to_csv("recipe_tags.csv", index=False)