In [157]:
import json

with open('series_data.json', "r") as file:
    data = json.load(file)

In [158]:
from dotenv import load_dotenv
import os

# Load environment variables from the .env file
load_dotenv()

# Access the API key
bureau_api_key = os.getenv("BUREAU_API_KEY")

if bureau_api_key:
    print(f"Bureau API Key: {bureau_api_key}")

Bureau API Key: 33091382135543188d76877099f635be


In [159]:
import pandas as pd
import requests
import json


def get_timeseries(series_id):
    errors = []
    url = "https://api.bls.gov/publicAPI/v2/timeseries/data/"

    # API key (register at https://data.bls.gov/registrationEngine/ to get one)
    api_key = ""

    # Request payload
    payload = {
        "seriesid": [f'{series_id}'],  # Series ID to request
        "startyear": "2010",           # Start year for the data
        "endyear": "2024",             # End year for the data
        "registrationkey": bureau_api_key    # Your API key
    }

    # Make the API request
    response = requests.post(url, data=json.dumps(payload), headers={"Content-Type": "application/json"})

    if response.status_code == 200:
        # try:
        response = response.json()    
        df = pd.DataFrame(response['Results']['series'][0])
        df = pd.json_normalize(df['data'])
        df['series_id'] = series_id
        df['ingredient'] = data[series_id]['name']
        df['metric'] = data[series_id]['metric']
        # except Exception as e:
        #     return response
    return df

In [161]:
dfs = []

for series_id in data.keys():
    data = get_timeseries(str(series_id))
    if type(data) != pd.DataFrame:
        print(data)
    else:
        dfs.append(data)

all_ingredients_data = pd.concat(dfs)
all_ingredients_data = all_ingredients_data.drop('footnotes', axis = 1)
res = all_ingredients_data.pivot(index = ['series_id', 'ingredient', 'metric'], columns=['year', 'periodName'], values='value')
prices_2011 = res['2011'].reset_index()
prices_2011.to_csv('./ingredients_1.csv')

KeyError: 'name'

In [2]:
import pandas as pd

mega_df = pd.read_csv('./ingredients.csv')

analyze_data = mega_df[['June', 'ingredient', 'metric']]
analyze_data = analyze_data.dropna()
analyze_data['ingredient'] = analyze_data['ingredient'].str.lower()
analyze_data = analyze_data.rename({'June' : 'price'}, axis = 1)

KeyError: "['June'] not in index"

## Adding more data

In [44]:
spices = {
    "ingredient": [
        "salt", "pepper", "cinnamon", "paprika", "garlic powder", "onion powder",
        "turmeric", "ginger", "cumin", "oregano", "basil", "thyme", "parsley",
        "nutmeg", "cloves"
    ],
    "price": [
        2.50, 4.00, 5.00, 3.50, 4.50, 4.25, 6.00, 5.50, 4.75, 3.75,
        3.25, 3.75, 3.00, 6.50, 7.00
    ],
    "metric": [
        "oz", "oz", "oz", "oz", "oz", "oz", "oz", "oz", "oz", "oz",
        "oz", "oz", "oz", "oz", "oz"
    ]
}
df_spices = pd.DataFrame(data=spices)
data_with_spices = pd.concat([analyze_data, df_spices]).drop_duplicates()

In [10]:
analyze_data = pd.read_csv('./ingredients_refines.csv')

In [11]:
ingredient_prices = {
    'ingredient': [
        'lemon', 'cream', 'yogurt', 'vanilla', 'shrimp', 
        'cilantro', 'jalapeno', 'parsley', 'vinegar', 'tortillas', 
        'red pepper', 'fish', 'avocado', 'lime', 'chicken stock', 
        'parmesan', 'vegetable stock', 'beef stock', 'garam masala', 
        'spinach', 'soy sauce'
    ],
    'price': [
        0.99, 15.96, 4.46, 111.84, 12.99, 
        4.33, 1.15, 4.97, 1.24, 4.78, 
        1.50, 9.99, 2.58, 2.00, 0.70, 
        4.99, 0.70, 0.70, 79.88, 
        2.99, 1.49
    ],
    'metric': [
        'lb', 'lb', 'lb', 'lb', 'lb', 
        'lb', 'lb', 'lb', 'lb', 'lb', 
        'lb', 'lb', 'lb', 'lb', 'lb', 
        'lb', 'lb', 'lb', 'lb', 
        'lb', 'lb'
    ]
}

more_items = pd.DataFrame(ingredient_prices)
analyze_data_2 = pd.concat([more_items, analyze_data])
analyze_data_2 = analyze_data_2.drop_duplicates().drop('Unnamed: 0', axis = 1)
analyze_data_2.to_csv('./ingredients_refined.csv')

ingredient_prices = {
    'ingredient': [
        'tomatoes', 'black pepper', 'chili', 'coriander', 'jalapenos', 
        'mango', 'oil', 'olives', 'onions', 'peppers', 'pork loin'
    ],
    'price': [
        1.99,   # tomatoes: average grocery store price
        18.99,  # black pepper: ground, converted from per oz
        2.50,   # chili peppers: fresh, average price
        12.99,  # coriander seeds: converted from per oz
        1.25,   # jalapenos: estimated per lb
        1.50,   # mango: average price per fruit converted to per lb
        4.99,   # cooking oil: converted from per bottle
        5.99,   # olives: converted from per jar
        1.29,   # onions: typical grocery price
        2.49,   # mixed peppers: average price
        6.99    # pork loin: typical per lb price
    ],
    'metric': [
        'lb', 'oz', 'each', 'oz', 'each', 
        'each', 'bottle', 'jar', 'lb', 'lb', 
        'lb'
    ]
}

df_ingredients_two = pd.DataFrame(ingredient_prices)
df = pd.concat([analyze_data_2, df_ingredients_two])
df['ingredient'] = df['ingredient'].str.strip()

In [19]:
analyze_data_2

Unnamed: 0,ingredient,price,metric
0,lemon,0.99,lb
1,cream,15.96,lb
2,yogurt,4.46,lb
3,vanilla,111.84,lb
4,shrimp,12.99,lb
...,...,...,...
74,basil,3.25,oz
75,thyme,3.75,oz
76,parsley,3.00,oz
77,nutmeg,6.50,oz


## Current Data

In [100]:
df = pd.read_csv('./ingredients_refined.csv')

### Looking at how many column mismatches receiving in our strings

In [101]:
import json
with open('./train.json', 'r') as f:
    recipes = json.load(f)

In [102]:
mismatches = set()

for recipe in recipes:

    recipe_json = recipe
    ingredients = recipe_json['ingredients']

    for ingredient in ingredients:
        df_data = df[df['ingredient'].str.contains(ingredient)]

        if len(df_data['ingredient']) == 0:
            data = df_data['ingredient']
            
            mismatches.add(ingredient)

In [96]:
mismatches

set()

In [103]:
recipes

[{'id': 10259,
  'cuisine': 'greek',
  'ingredients': ['lettuce',
   'tomatoes',
   'garlic',
   'pepper',
   'onion',
   'cumin',
   'coriander',
   'bean',
   'cheese']},
 {'id': 25693,
  'cuisine': 'southern_us',
  'ingredients': ['flour',
   'pepper',
   'salt',
   'tomatoes',
   'black pepper',
   'thyme',
   'eggs',
   'milk',
   'oil']},
 {'id': 20130,
  'cuisine': 'filipino',
  'ingredients': ['eggs',
   'pepper',
   'salt',
   'oil',
   'chili',
   'chicken breast',
   'garlic',
   'onion',
   'butter',
   'whole chicken']},
 {'id': 6602,
  'cuisine': 'jamaican',
  'ingredients': ['flour',
   'sugar',
   'butter',
   'eggs',
   'ginger',
   'salt',
   'cinnamon',
   'milk',
   'vanilla']},
 {'id': 42779,
  'cuisine': 'spanish',
  'ingredients': ['oil',
   'salt',
   'shrimp',
   'pepper',
   'garlic',
   'cilantro',
   'parsley',
   'sirloin steak',
   'vinegar',
   'salt',
   'ham']},
 {'id': 16903,
  'cuisine': 'mexican',
  'ingredients': ['oil',
   'onion',
   'pork chops',

# Building the Amount matrix

In [11]:
import json
with open('./train.json', 'r') as f:
    recipes = json.load(f)

df = pd.read_csv('./ingredients_refined.csv')

In [None]:
for recipe in recipes:
    

# Modeling 

In [2]:
import pandas as pd
import numpy as np 
df = pd.read_csv('./ingredients_refined.csv')
ingredients = np.array(df['ingredient'].values)
price = df['price']
metric = df['metric']

In [8]:
import gamspy as gp
import numpy as np

cont = gp.Container()
I = cont.addSet('I', records = range(1, len(ingredients) + 1))
R = cont.addSet('R', records = range(1, len(recipes) + 1))

In [152]:
names = cont.addSet('names', domain = 'I', records=[])

Exception: Dimensionality of records (121) is inconsistent with set domain specification (1)

In [None]:
R = cont.addParameter()