In [19]:
'''
 Prepared by Carolina B.

 This file consists of 3 parts:
 1) daily price data extraction of agricultural ETF's:
 Corn: CORN,
 Wheat: WEAT,
 Soybeans: SOYB 

 2) Cleaning of daily prices of futures market (extracted from Tradestation):
 corn_daily.csv, soybeans_daily.csv and wheat_daily.csv

 3) Concatenation and grouping of the daily data from each asset and compiled
 into one set of clean data 

 
'''
# Import libraries 
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi


# PART 1 - Extraction and Cleaning of agricultural ETF's 

Source: ALPACA API

In [20]:
# Import json as per API documentation for Alpaca
import json


In [21]:
# Load .env enviroment variables
load_dotenv()

True

In [26]:
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
type(alpaca_api_key)

# Create the Alpaca API object

alpaca_trade = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2"
)

tickers = ["CORN", "WEAT", "SOYB"]
timeframe = "1D"

In [44]:
# Extract data for CORN, WEAT and SOYB with setstart and end date of 20 years from now
start_date = pd.Timestamp('2000-10-01', tz='America/Chicago').isoformat()
end_date = pd.Timestamp('2020-10-01', tz='America/Chicago').isoformat()

df_etf_data = alpaca_trade.get_barset(
    tickers,
    timeframe,
    start = start_date,
    end = end_date
).df

df_etf_data.head()

Unnamed: 0_level_0,CORN,CORN,CORN,CORN,CORN,SOYB,SOYB,SOYB,SOYB,SOYB,WEAT,WEAT,WEAT,WEAT,WEAT
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,open,high,low,close,volume
2010-06-09 00:00:00-04:00,25.12,25.25,25.12,25.15,1700.0,,,,,,,,,,
2010-06-10 00:00:00-04:00,25.46,25.46,25.4599,25.4599,200.0,,,,,,,,,,
2010-06-11 00:00:00-04:00,25.88,25.88,25.7913,25.7913,500.0,,,,,,,,,,
2010-06-14 00:00:00-04:00,25.99,26.11,25.99,26.11,2247.0,,,,,,,,,,
2010-06-15 00:00:00-04:00,26.24,26.24,25.969,25.969,6955.0,,,,,,,,,,


In [45]:
df_etf_data.dtypes


CORN  open      float64
      high      float64
      low       float64
      close     float64
      volume    float64
SOYB  open      float64
      high      float64
      low       float64
      close     float64
      volume    float64
WEAT  open      float64
      high      float64
      low       float64
      close     float64
      volume    float64
dtype: object

# Part 2 - Extract csv files for futures data

Source: csv file from Tradestation 

In [61]:
# Import relevant libraries
from pathlib import Path
import csv



In [66]:
# Read each csv file
corn_csv = Path("raw_data/corn_daily.csv")
df_corn = pd.read_csv(corn_csv, infer_datetime_format= True, parse_dates = True, index_col= "Date")
df_corn.drop(columns=["Time","Open","High","Low", "Vol","OI"], inplace=True)
df_corn.columns = ["corn"]
df_corn.head()

Unnamed: 0_level_0,corn
Date,Unnamed: 1_level_1
2000-10-26,213.75
2000-10-27,212.5
2000-10-30,212.0
2000-10-31,217.5
2000-11-01,217.5


In [73]:

def csvexatract_df (filepath,col_name):
    dest_name = pd.read_csv(filepath, infer_datetime_format= True, parse_dates = True, index_col= "Date")
    dest_name.drop(columns=["Time","Open","High","Low", "Vol","OI"], inplace=True)
    dest_name.columns = [col_name]
    return dest_name

cornb = csvexatract_df (corn_csv,"cornb")
cornb



Unnamed: 0_level_0,cornb
Date,Unnamed: 1_level_1
2000-10-26,213.75
2000-10-27,212.50
2000-10-30,212.00
2000-10-31,217.50
2000-11-01,217.50
...,...
2020-10-22,416.25
2020-10-23,419.25
2020-10-26,417.75
2020-10-27,416.00


In [75]:
soybean_csv = Path("raw_data/soybeans_daily.csv")
soybean_df = csvexatract_df(soybean_csv,"soybean")
soybean_df

Unnamed: 0_level_0,soybean
Date,Unnamed: 1_level_1
2000-10-26,503.75
2000-10-27,500.50
2000-10-30,502.75
2000-10-31,504.00
2000-11-01,505.00
...,...
2020-10-22,1073.75
2020-10-23,1083.75
2020-10-26,1087.75
2020-10-27,1082.25


In [76]:
wheat_csv = Path("raw_data/wheat_daily.csv")
wheat_df = csvexatract_df(wheat_csv,"wheat")
wheat_df

Unnamed: 0_level_0,wheat
Date,Unnamed: 1_level_1
2000-10-26,313.00
2000-10-27,315.50
2000-10-30,312.25
2000-10-31,312.25
2000-11-01,311.75
...,...
2020-10-22,622.75
2020-10-23,632.75
2020-10-26,620.00
2020-10-27,615.75


In [77]:
df_futures = pd.concat([df_corn,soybean_df,wheat_df], axis="columns", join="inner")
df_futures.head()

Unnamed: 0_level_0,corn,soybean,wheat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-10-26,213.75,503.75,313.0
2000-10-27,212.5,500.5,315.5
2000-10-30,212.0,502.75,312.25
2000-10-31,217.5,504.0,312.25
2000-11-01,217.5,505.0,311.75
