In [1]:
#import dependencies
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
import requests
import json
from pprint import pprint
from datetime import datetime

from libs.usda import pull_usda_data

### pulling and cleaning usda api data

In [2]:
#assign usda api key
load_dotenv()
usda_api_key = os.getenv('usda_api_key')

In [3]:
#assign paramenter variables to pull data from usda query
commodity = 'CORN'
stat_list = ['PRODUCTION', 'STOCKS']
level_list = ['STATE', 'NATIONAL']
year = 2019

In [4]:
#pull data from usda api using pull_usda_data function based on defined variables and make list of returned dataframes of data
data_df_list = []

for stat in stat_list:
    for level in level_list:
        data_df = pull_usda_data(usda_api_key, commodity, stat, level, year)
        data_df_list.append(data_df)

In [5]:
#combine all data pulled from usda api into one dataframe
commodity_df = pd.concat(data_df_list, axis = 0)

#export to .csv based on sub_commodity
for sub_commodity in commodity_df['sub_commodity'].unique():
    clean_data_df = commodity_df.loc[commodity_df['sub_commodity'] == sub_commodity, :]
    clean_data_df.to_csv(f'clean_data/usda_data/usda_data_{commodity.lower()}_{sub_commodity.lower()}.csv', index = False)

### pulling wasde raw data from api to save out excel files

In [12]:
#setup wasde api request
login_info = {"auth[email]":"jordan.m.gross@gmail.com","auth[password]":"Swoosh!4"}
token = requests.post('https://usda.library.cornell.edu/user_token', data = login_info).json()
token = token['jwt']

end_date = datetime.today().strftime('%Y-%m-%d')

files = requests.get(f'https://usda.library.cornell.edu/api/v1/release/findByIdentifier/wasde?latest=false&start_date=2008-06-01&end_date={end_date}', headers = ({'Authorization' :  f'Bearer {token}'})).json()

In [15]:
#loop through json returned from wasde api request to find .xls report links for each month and save out excel file to raw data folder
for i in files:
    for file in i['files']:
        if '.xls' in file:
            xls_url = file
        else:
            next
    
    xls_data = requests.get(xls_url)

    with open(f'raw_data/wasde_data/wasde_{i["release_datetime"]}.xls', 'wb') as output:
        output.write(xls_data.content)