In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
import numpy as np
from pandas.tseries.offsets import MonthEnd, YearEnd


# user defined - see utils.py
from utils import snake_case_columns, beef_price_lineplot, cso_date_to_datetime
from utils import transform_indexmundi_yearly_data, prepare_forex_data, transform_fred_stlouisfed_quarterlydata

# Australian Beef prices

https://www.indexmundi.com/commodities/?commodity=beef&months=360&currency=aud

In [2]:
df_beef_aus = pd.read_csv("data/aus/beef_aus_1995-2022.csv")

In [3]:
df_beef_aus

Unnamed: 0,Month,Price,Change
0,Jan-95,2.78,-
1,Feb-95,2.90,4.13%
2,Mar-95,2.78,-4.18%
3,Apr-95,2.60,-6.24%
4,May-95,2.40,-7.65%
...,...,...,...
322,Nov-21,8.12,5.53%
323,Dec-21,8.35,2.80%
324,Jan-22,8.32,-0.38%
325,Feb-22,8.68,4.35%


In [4]:
df_beef_aus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 327 entries, 0 to 326
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Month   327 non-null    object 
 1   Price   327 non-null    float64
 2   Change  327 non-null    object 
dtypes: float64(1), object(2)
memory usage: 7.8+ KB


In [5]:
snake_case_columns(df_beef_aus)

In [6]:
df_beef_aus = df_beef_aus.rename(columns = {"price":"beef_per_kg"})

In [7]:
df_beef_aus["month"] = pd.to_datetime(df_beef_aus["month"], format='%b-%y') + MonthEnd(1)

In [8]:
df_beef_aus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 327 entries, 0 to 326
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   month        327 non-null    datetime64[ns]
 1   beef_per_kg  327 non-null    float64       
 2   change       327 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 7.8+ KB


In [9]:
df_beef_aus = df_beef_aus.drop(["change"], axis=1)

In [10]:
df_beef_aus = df_beef_aus.set_index("month")

In [11]:
df_beef_aus = df_beef_aus[df_beef_aus.index.year > 1998]

## final output

In [12]:
df_beef_aus

Unnamed: 0_level_0,beef_per_kg
month,Unnamed: 1_level_1
1999-01-31,2.71
1999-02-28,2.80
1999-03-31,2.79
1999-04-30,2.68
1999-05-31,2.61
...,...
2021-11-30,8.12
2021-12-31,8.35
2022-01-31,8.32
2022-02-28,8.68


# Australia Beef and Veal Meat Domestic Consumption by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=beef-and-veal-meat&graph=domestic-consumption

In [13]:
df_meat_consumption_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-domestic-consumption.csv")

In [14]:
df_meat_consumption_aus.columns

Index(['Market Year', ' Value', ' Unit Description'], dtype='object')

In [15]:
df_meat_consumption_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,521,(1000 MT CWE)
1,1961,446,(1000 MT CWE)
2,1962,502,(1000 MT CWE)
3,1963,526,(1000 MT CWE)
4,1964,553,(1000 MT CWE)
...,...,...,...
58,2018,741,(1000 MT CWE)
59,2019,708,(1000 MT CWE)
60,2020,669,(1000 MT CWE)
61,2021,605,(1000 MT CWE)


In [16]:
df_meat_consumption_aus.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63 entries, 0 to 62
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Market Year        63 non-null     int64 
 1    Value             63 non-null     int64 
 2    Unit Description  63 non-null     object
dtypes: int64(2), object(1)
memory usage: 1.6+ KB


In [17]:
snake_case_columns(df_meat_consumption_aus)

In [18]:
df_meat_consumption_aus["market_year"] = pd.to_datetime(df_meat_consumption_aus["market_year"], format='%Y') + YearEnd(1)

In [19]:
df_meat_consumption_aus

Unnamed: 0,market_year,_value,_unit_description
0,1960-12-31,521,(1000 MT CWE)
1,1961-12-31,446,(1000 MT CWE)
2,1962-12-31,502,(1000 MT CWE)
3,1963-12-31,526,(1000 MT CWE)
4,1964-12-31,553,(1000 MT CWE)
...,...,...,...
58,2018-12-31,741,(1000 MT CWE)
59,2019-12-31,708,(1000 MT CWE)
60,2020-12-31,669,(1000 MT CWE)
61,2021-12-31,605,(1000 MT CWE)


In [20]:
df_meat_consumption_aus = df_meat_consumption_aus.rename(columns={"market_year" : "month", "_value" : "meat_consumption" })

In [21]:
df_meat_consumption_aus = df_meat_consumption_aus.drop(["_unit_description"], axis=1)

In [22]:
df_meat_consumption_aus = df_meat_consumption_aus.set_index("month")

In [23]:
df_meat_consumption_aus.isnull().sum()

meat_consumption    0
dtype: int64

In [24]:
df_meat_consumption_aus

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
1960-12-31,521
1961-12-31,446
1962-12-31,502
1963-12-31,526
1964-12-31,553
...,...
2018-12-31,741
2019-12-31,708
2020-12-31,669
2021-12-31,605


In [25]:
df_meat_consumption_aus_monthly = df_meat_consumption_aus.resample('M').last()

In [26]:
df_meat_consumption_aus_monthly.head(20)

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
1960-12-31,521.0
1961-01-31,
1961-02-28,
1961-03-31,
1961-04-30,
1961-05-31,
1961-06-30,
1961-07-31,
1961-08-31,
1961-09-30,


In [27]:
df_meat_consumption_aus_monthly = df_meat_consumption_aus.resample('M').last().bfill() / 12

In [28]:
df_meat_consumption_aus_monthly[df_meat_consumption_aus_monthly.index.year==1961].sum()

meat_consumption    446.0
dtype: float64

In [29]:
# 1960 shows only one row but it won't be used anyway
df_meat_consumption_aus_monthly[(df_meat_consumption_aus_monthly.index.year == 1960) ]

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
1960-12-31,43.416667


In [30]:
df_meat_consumption_aus_monthly[(df_meat_consumption_aus_monthly.index.year >= 1960) & (df_meat_consumption_aus_monthly.index.year < 1963)]

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
1960-12-31,43.416667
1961-01-31,37.166667
1961-02-28,37.166667
1961-03-31,37.166667
1961-04-30,37.166667
1961-05-31,37.166667
1961-06-30,37.166667
1961-07-31,37.166667
1961-08-31,37.166667
1961-09-30,37.166667


In [31]:
df_meat_consumption_aus_monthly[(df_meat_consumption_aus_monthly.index.year > 2000) & (df_meat_consumption_aus_monthly.index.year < 2003)]

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
2001-01-31,58.75
2001-02-28,58.75
2001-03-31,58.75
2001-04-30,58.75
2001-05-31,58.75
2001-06-30,58.75
2001-07-31,58.75
2001-08-31,58.75
2001-09-30,58.75
2001-10-31,58.75


In [32]:
df_meat_consumption_aus_monthly.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 745 entries, 1960-12-31 to 2022-12-31
Freq: M
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   meat_consumption  745 non-null    float64
dtypes: float64(1)
memory usage: 11.6 KB


In [33]:
df_meat_consumption_aus_monthly = df_meat_consumption_aus_monthly[df_meat_consumption_aus_monthly.index.year > 1989]

In [34]:
df_meat_consumption_aus_monthly = df_meat_consumption_aus_monthly[df_meat_consumption_aus_monthly.index.year < 2022]

## final outoput

In [35]:
df_meat_consumption_aus_monthly

Unnamed: 0_level_0,meat_consumption
month,Unnamed: 1_level_1
1990-01-31,54.250000
1990-02-28,54.250000
1990-03-31,54.250000
1990-04-30,54.250000
1990-05-31,54.250000
...,...
2021-08-31,50.416667
2021-09-30,50.416667
2021-10-31,50.416667
2021-11-30,50.416667


# Australia Beef and Veal Meat Exports by Year

In [36]:
df_exports_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-exports.csv")

In [37]:
df_exports_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,165,(1000 MT CWE)
1,1961,275,(1000 MT CWE)
2,1962,382,(1000 MT CWE)
3,1963,426,(1000 MT CWE)
4,1964,442,(1000 MT CWE)
...,...,...,...
58,2018,1582,(1000 MT CWE)
59,2019,1739,(1000 MT CWE)
60,2020,1473,(1000 MT CWE)
61,2021,1320,(1000 MT CWE)


In [38]:
# just use the user-defined fucntion from my utils.py this time
df_exports_aus_M = transform_indexmundi_yearly_data(df_exports_aus, "meat_exports")

In [39]:
df_exports_aus_M.isnull().sum()

meat_exports    0
dtype: int64

In [40]:
df_exports_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   meat_exports  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [41]:
df_exports_aus_M

Unnamed: 0_level_0,meat_exports
month,Unnamed: 1_level_1
1990-01-31,88.666667
1990-02-28,88.666667
1990-03-31,88.666667
1990-04-30,88.666667
1990-05-31,88.666667
...,...
2021-08-31,110.000000
2021-09-30,110.000000
2021-10-31,110.000000
2021-11-30,110.000000


# Australia Beef and Veal Meat Imports by Year

In [42]:
df_imports_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-imports.csv")

In [43]:
df_imports_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,0,(1000 MT CWE)
1,1961,0,(1000 MT CWE)
2,1962,0,(1000 MT CWE)
3,1963,0,(1000 MT CWE)
4,1964,0,(1000 MT CWE)
...,...,...,...
58,2018,14,(1000 MT CWE)
59,2019,15,(1000 MT CWE)
60,2020,17,(1000 MT CWE)
61,2021,25,(1000 MT CWE)


In [44]:
df_imports_aus_M = transform_indexmundi_yearly_data(df_imports_aus, "meat_imports")

In [45]:
df_imports_aus_M.isnull().sum()

meat_imports    0
dtype: int64

In [46]:
df_imports_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   meat_imports  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [47]:
df_imports_aus_M

Unnamed: 0_level_0,meat_imports
month,Unnamed: 1_level_1
1990-01-31,0.000000
1990-02-28,0.000000
1990-03-31,0.000000
1990-04-30,0.000000
1990-05-31,0.000000
...,...
2021-08-31,2.083333
2021-09-30,2.083333
2021-10-31,2.083333
2021-11-30,2.083333


# Australia Beef and Veal Meat Total Distribution by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=beef-and-veal-meat&graph=total-distribution

In [48]:
df_meat_dist_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-total-distribution.csv")

In [49]:
df_meat_dist_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,701,(1000 MT CWE)
1,1961,745,(1000 MT CWE)
2,1962,903,(1000 MT CWE)
3,1963,976,(1000 MT CWE)
4,1964,1022,(1000 MT CWE)
...,...,...,...
58,2018,2323,(1000 MT CWE)
59,2019,2447,(1000 MT CWE)
60,2020,2142,(1000 MT CWE)
61,2021,1925,(1000 MT CWE)


In [50]:
df_meat_dist_aus_M = transform_indexmundi_yearly_data(df_meat_dist_aus, "meat_distribution")

In [51]:
df_meat_dist_aus_M.isnull().sum()

meat_distribution    0
dtype: int64

In [52]:
df_meat_dist_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   meat_distribution  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [53]:
df_meat_dist_aus_M

Unnamed: 0_level_0,meat_distribution
month,Unnamed: 1_level_1
1990-01-31,145.333333
1990-02-28,145.333333
1990-03-31,145.333333
1990-04-30,145.333333
1990-05-31,145.333333
...,...
2021-08-31,160.416667
2021-09-30,160.416667
2021-10-31,160.416667
2021-11-30,160.416667


# Australia Beef and Veal Meat Total Supply by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=beef-and-veal-meat&graph=total-supply

In [54]:
df_beef_supply_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-total-supply.csv")

In [55]:
df_beef_supply_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,701,(1000 MT CWE)
1,1961,745,(1000 MT CWE)
2,1962,903,(1000 MT CWE)
3,1963,976,(1000 MT CWE)
4,1964,1022,(1000 MT CWE)
...,...,...,...
58,2018,2323,(1000 MT CWE)
59,2019,2447,(1000 MT CWE)
60,2020,2142,(1000 MT CWE)
61,2021,1925,(1000 MT CWE)


In [56]:
df_beef_supply_aus_M = transform_indexmundi_yearly_data(df_beef_supply_aus, "meat_supply")

In [57]:
df_beef_supply_aus_M.isnull().sum()

meat_supply    0
dtype: int64

In [58]:
df_beef_supply_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   meat_supply  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [59]:
df_beef_supply_aus_M

Unnamed: 0_level_0,meat_supply
month,Unnamed: 1_level_1
1990-01-31,145.333333
1990-02-28,145.333333
1990-03-31,145.333333
1990-04-30,145.333333
1990-05-31,145.333333
...,...
2021-08-31,160.416667
2021-09-30,160.416667
2021-10-31,160.416667
2021-11-30,160.416667


# Australia Animal Numbers, Cattle Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=cattle&graph=production

In [60]:
df_cattle_production_aus = pd.read_csv("data/aus/au-cattle-production.csv")

In [61]:
df_cattle_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,0,(1000 HEAD)
1,1961,0,(1000 HEAD)
2,1962,0,(1000 HEAD)
3,1963,0,(1000 HEAD)
4,1964,0,(1000 HEAD)
...,...,...,...
58,2018,9100,(1000 HEAD)
59,2019,8700,(1000 HEAD)
60,2020,8300,(1000 HEAD)
61,2021,8200,(1000 HEAD)


In [62]:
df_cattle_production_aus_M = transform_indexmundi_yearly_data(df_cattle_production_aus, "cattle_production" )

In [63]:
df_cattle_production_aus_M.isnull().sum()

cattle_production    0
dtype: int64

In [64]:
df_cattle_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cattle_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [65]:
df_cattle_production_aus_M

Unnamed: 0_level_0,cattle_production
month,Unnamed: 1_level_1
1990-01-31,754.833333
1990-02-28,754.833333
1990-03-31,754.833333
1990-04-30,754.833333
1990-05-31,754.833333
...,...
2021-08-31,683.333333
2021-09-30,683.333333
2021-10-31,683.333333
2021-11-30,683.333333


# Australia Animal Numbers, Swine Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=swine&graph=production

In [66]:
df_swine_production_aus = pd.read_csv("data/aus/au-swine-production.csv")

In [67]:
df_swine_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,0,(1000 HEAD)
1,1961,0,(1000 HEAD)
2,1962,0,(1000 HEAD)
3,1963,0,(1000 HEAD)
4,1964,0,(1000 HEAD)
5,1965,0,(1000 HEAD)
6,1966,0,(1000 HEAD)
7,1967,3202,(1000 HEAD)
8,1968,3379,(1000 HEAD)
9,1969,3676,(1000 HEAD)


In [68]:
df_swine_production_aus_M = transform_indexmundi_yearly_data(df_swine_production_aus, "swine_production_by_head")

In [69]:
df_swine_production_aus_M.isnull().sum()

swine_production_by_head    0
dtype: int64

In [70]:
df_swine_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 324 entries, 1990-01-31 to 2016-12-31
Freq: M
Data columns (total 1 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   swine_production_by_head  324 non-null    float64
dtypes: float64(1)
memory usage: 5.1 KB


In [71]:
df_swine_production_aus_M = df_swine_production_aus_M[df_swine_production_aus_M.index.year > 1998]

## final output

In [72]:
df_swine_production_aus_M

Unnamed: 0_level_0,swine_production_by_head
month,Unnamed: 1_level_1
1999-01-31,410.25
1999-02-28,410.25
1999-03-31,410.25
1999-04-30,410.25
1999-05-31,410.25
...,...
2016-08-31,419.00
2016-09-30,419.00
2016-10-31,419.00
2016-11-30,419.00


# Australia Barley Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=barley&graph=production

In [73]:
df_barley_production_aus = pd.read_csv("data/aus/au-barley-production.csv")

In [74]:
df_barley_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,1542,(1000 MT)
1,1961,941,(1000 MT)
2,1962,898,(1000 MT)
3,1963,984,(1000 MT)
4,1964,1118,(1000 MT)
...,...,...,...
57,2017,9254,(1000 MT)
58,2018,8819,(1000 MT)
59,2019,10127,(1000 MT)
60,2020,13100,(1000 MT)


In [75]:
df_barley_production_aus_M = transform_indexmundi_yearly_data(df_barley_production_aus, "barley_production")

In [76]:
df_barley_production_aus_M.isnull().sum()

barley_production    0
dtype: int64

In [77]:
df_barley_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   barley_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [78]:
df_barley_production_aus_M

Unnamed: 0_level_0,barley_production
month,Unnamed: 1_level_1
1990-01-31,348.666667
1990-02-28,348.666667
1990-03-31,348.666667
1990-04-30,348.666667
1990-05-31,348.666667
...,...
2021-08-31,1083.333333
2021-09-30,1083.333333
2021-10-31,1083.333333
2021-11-30,1083.333333


# Australia Beef and Veal Meat Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=beef-and-veal-meat&graph=production

In [79]:
df_beef_and_veal_production_aus = pd.read_csv("data/aus/au-beef-and-veal-meat-production.csv")

In [80]:
df_beef_and_veal_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,685,(1000 MT CWE)
1,1961,730,(1000 MT CWE)
2,1962,879,(1000 MT CWE)
3,1963,957,(1000 MT CWE)
4,1964,998,(1000 MT CWE)
...,...,...,...
58,2018,2309,(1000 MT CWE)
59,2019,2432,(1000 MT CWE)
60,2020,2125,(1000 MT CWE)
61,2021,1900,(1000 MT CWE)


In [81]:
df_beef_and_veal_production_aus_M = transform_indexmundi_yearly_data(df_beef_and_veal_production_aus, "beef_and_veal_production")

In [82]:
df_beef_and_veal_production_aus_M.isnull().sum()

beef_and_veal_production    0
dtype: int64

In [83]:
df_beef_and_veal_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   beef_and_veal_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [84]:
df_beef_and_veal_production_aus_M

Unnamed: 0_level_0,beef_and_veal_production
month,Unnamed: 1_level_1
1990-01-31,143.166667
1990-02-28,143.166667
1990-03-31,143.166667
1990-04-30,143.166667
1990-05-31,143.166667
...,...
2021-08-31,158.333333
2021-09-30,158.333333
2021-10-31,158.333333
2021-11-30,158.333333


# Australia Broiler Meat (Poultry) Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=broiler-meat&graph=production

In [85]:
df_poultry_production_aus = pd.read_csv("data/aus/au-broiler-meat-production.csv")

In [86]:
df_poultry_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1965,45,(1000 MT)
1,1966,56,(1000 MT)
2,1967,76,(1000 MT)
3,1968,89,(1000 MT)
4,1969,94,(1000 MT)
5,1970,105,(1000 MT)
6,1971,131,(1000 MT)
7,1972,142,(1000 MT)
8,1973,138,(1000 MT)
9,1974,171,(1000 MT)


In [87]:
df_poultry_production_aus_M = transform_indexmundi_yearly_data(df_poultry_production_aus, "poultry_production")

In [88]:
df_poultry_production_aus_M.isnull().sum()

poultry_production    0
dtype: int64

In [89]:
df_poultry_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 324 entries, 1990-01-31 to 2016-12-31
Freq: M
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   poultry_production  324 non-null    float64
dtypes: float64(1)
memory usage: 5.1 KB


In [90]:
df_poultry_production_aus_M

Unnamed: 0_level_0,poultry_production
month,Unnamed: 1_level_1
1990-01-31,31.416667
1990-02-28,31.416667
1990-03-31,31.416667
1990-04-30,31.416667
1990-05-31,31.416667
...,...
2016-08-31,97.000000
2016-09-30,97.000000
2016-10-31,97.000000
2016-11-30,97.000000


In [91]:
# https://stackoverflow.com/questions/34915828/pandas-date-range-to-generate-monthly-data-at-beginning-of-the-month
# https://pandas.pydata.org/docs/reference/api/pandas.Index.to_frame.html
df_poultry_missing_months = pd.date_range(start='31-JAN-2017', end='31-DEC-2020', freq='M').to_frame(index=True, name='month').drop(["month"],axis=1)
df_poultry_missing_months["poultry_production"] = np.nan
df_poultry_missing_months.index.rename('month', inplace=True)
df_poultry_missing_months.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 48 entries, 2017-01-31 to 2020-12-31
Freq: M
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   poultry_production  0 non-null      float64
dtypes: float64(1)
memory usage: 768.0 bytes


In [92]:
df_poultry_production_aus_M = df_poultry_production_aus_M.append(df_poultry_missing_months)

In [93]:
df_poultry_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 372 entries, 1990-01-31 to 2020-12-31
Freq: M
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   poultry_production  324 non-null    float64
dtypes: float64(1)
memory usage: 5.8 KB


In [94]:
df_poultry_production_aus_M["poultry_production"] = df_poultry_production_aus_M["poultry_production"].interpolate(method='linear')

In [95]:
df_poultry_production_aus_M[df_poultry_production_aus_M.index.year > 2019] ## ugh it'll have to do

Unnamed: 0_level_0,poultry_production
month,Unnamed: 1_level_1
2020-01-31,97.0
2020-02-29,97.0
2020-03-31,97.0
2020-04-30,97.0
2020-05-31,97.0
2020-06-30,97.0
2020-07-31,97.0
2020-08-31,97.0
2020-09-30,97.0
2020-10-31,97.0


In [96]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_poultry_production_aus_M.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

In [97]:
df_poultry_production_aus_M = df_poultry_production_aus_M[df_poultry_production_aus_M.index.year > 1998]

In [98]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_poultry_production_aus_M.index)

DatetimeIndex([], dtype='datetime64[ns]', freq='M')

## final output

In [99]:
df_poultry_production_aus_M

Unnamed: 0_level_0,poultry_production
month,Unnamed: 1_level_1
1999-01-31,45.833333
1999-02-28,45.833333
1999-03-31,45.833333
1999-04-30,45.833333
1999-05-31,45.833333
...,...
2020-08-31,97.000000
2020-09-30,97.000000
2020-10-31,97.000000
2020-11-30,97.000000


# Australia Corn Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=corn&graph=production

In [100]:
df_corn_production_aus = pd.read_csv("data/aus/au-corn-production.csv")

In [101]:
df_corn_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,159,(1000 MT)
1,1961,186,(1000 MT)
2,1962,189,(1000 MT)
3,1963,171,(1000 MT)
4,1964,175,(1000 MT)
...,...,...,...
57,2017,387,(1000 MT)
58,2018,327,(1000 MT)
59,2019,268,(1000 MT)
60,2020,356,(1000 MT)


In [102]:
df_corn_production_aus_M = transform_indexmundi_yearly_data(df_corn_production_aus, "corn_production")

In [103]:
df_corn_production_aus_M.isnull().sum()

corn_production    0
dtype: int64

In [104]:
df_corn_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   corn_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [105]:
df_corn_production_aus_M

Unnamed: 0_level_0,corn_production
month,Unnamed: 1_level_1
1990-01-31,17.083333
1990-02-28,17.083333
1990-03-31,17.083333
1990-04-30,17.083333
1990-05-31,17.083333
...,...
2021-08-31,35.833333
2021-09-30,35.833333
2021-10-31,35.833333
2021-11-30,35.833333


# Australia Cotton Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=cotton&graph=production

In [106]:
df_cotton_production_aus = pd.read_csv("data/aus/au-cotton-production.csv")

In [107]:
df_cotton_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,8,1000 480 lb. Bales
1,1961,8,1000 480 lb. Bales
2,1962,10,1000 480 lb. Bales
3,1963,16,1000 480 lb. Bales
4,1964,45,1000 480 lb. Bales
...,...,...,...
57,2017,4800,1000 480 lb. Bales
58,2018,2200,1000 480 lb. Bales
59,2019,625,1000 480 lb. Bales
60,2020,2800,1000 480 lb. Bales


In [108]:
df_cotton_production_aus_M = transform_indexmundi_yearly_data(df_cotton_production_aus, "cotton_production")

In [109]:
df_cotton_production_aus_M.isnull().sum()

cotton_production    0
dtype: int64

In [110]:
df_cotton_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cotton_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [111]:
df_cotton_production_aus_M

Unnamed: 0_level_0,cotton_production
month,Unnamed: 1_level_1
1990-01-31,165.750000
1990-02-28,165.750000
1990-03-31,165.750000
1990-04-30,165.750000
1990-05-31,165.750000
...,...
2021-08-31,458.333333
2021-09-30,458.333333
2021-10-31,458.333333
2021-11-30,458.333333


# Australia Dairy, Butter Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=butter&graph=production

In [112]:
df_butter_production_aus = pd.read_csv("data/aus/au-butter-production.csv")

In [113]:
df_butter_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1964,206,(1000 MT)
1,1965,206,(1000 MT)
2,1966,209,(1000 MT)
3,1967,222,(1000 MT)
4,1968,196,(1000 MT)
5,1969,198,(1000 MT)
6,1970,223,(1000 MT)
7,1971,203,(1000 MT)
8,1972,196,(1000 MT)
9,1973,185,(1000 MT)


In [114]:
df_butter_production_aus_M = transform_indexmundi_yearly_data(df_butter_production_aus, "butter_production")

In [115]:
df_butter_production_aus_M.isnull().sum()

butter_production    0
dtype: int64

In [116]:
df_butter_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   butter_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [117]:
df_butter_production_aus_M

Unnamed: 0_level_0,butter_production
month,Unnamed: 1_level_1
1990-01-31,9.250000
1990-02-28,9.250000
1990-03-31,9.250000
1990-04-30,9.250000
1990-05-31,9.250000
...,...
2021-08-31,6.666667
2021-09-30,6.666667
2021-10-31,6.666667
2021-11-30,6.666667


# Australia Dairy, Cheese Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=cheese&graph=production

In [118]:
df_cheese_production_aus = pd.read_csv("data/aus/au-cheese-production.csv")

In [119]:
df_cheese_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1964,59,(1000 MT)
1,1965,63,(1000 MT)
2,1966,60,(1000 MT)
3,1967,70,(1000 MT)
4,1968,71,(1000 MT)
5,1969,75,(1000 MT)
6,1970,76,(1000 MT)
7,1971,78,(1000 MT)
8,1972,81,(1000 MT)
9,1973,93,(1000 MT)


In [120]:
df_cheese_production_aus_M = transform_indexmundi_yearly_data(df_cheese_production_aus, "cheese_production")

In [121]:
df_cheese_production_aus_M.isnull().sum()

cheese_production    0
dtype: int64

In [122]:
df_cheese_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   cheese_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [123]:
df_cheese_production_aus_M

Unnamed: 0_level_0,cheese_production
month,Unnamed: 1_level_1
1990-01-31,14.583333
1990-02-28,14.583333
1990-03-31,14.583333
1990-04-30,14.583333
1990-05-31,14.583333
...,...
2021-08-31,30.000000
2021-09-30,30.000000
2021-10-31,30.000000
2021-11-30,30.000000


# Australia Dairy, Dry Whole Milk Powder Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=powdered-whole-milk&graph=production

In [124]:
df_powdered_milk_production_aus = pd.read_csv("data/aus/au-powdered-whole-milk-production.csv")

In [125]:
df_powdered_milk_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1982,171,(1000 MT)
1,1983,176,(1000 MT)
2,1984,174,(1000 MT)
3,1985,195,(1000 MT)
4,1986,177,(1000 MT)
5,1987,194,(1000 MT)
6,1988,184,(1000 MT)
7,1989,187,(1000 MT)
8,1990,200,(1000 MT)
9,1991,58,(1000 MT)


In [126]:
df_powdered_milk_production_aus_M = transform_indexmundi_yearly_data(df_powdered_milk_production_aus, "milk_powder_production")

In [127]:
df_powdered_milk_production_aus_M.isnull().sum()

milk_powder_production    0
dtype: int64

In [128]:
df_powdered_milk_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   milk_powder_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [129]:
df_powdered_milk_production_aus_M

Unnamed: 0_level_0,milk_powder_production
month,Unnamed: 1_level_1
1990-01-31,16.666667
1990-02-28,16.666667
1990-03-31,16.666667
1990-04-30,16.666667
1990-05-31,16.666667
...,...
2021-08-31,4.583333
2021-09-30,4.583333
2021-10-31,4.583333
2021-11-30,4.583333


# Australia Dairy, Milk, Fluid Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=milk&graph=production

In [130]:
df_milk_production_aus = pd.read_csv("data/aus/au-milk-production.csv")

In [131]:
df_milk_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1964,6991,(1000 MT)
1,1965,7105,(1000 MT)
2,1966,7111,(1000 MT)
3,1967,7497,(1000 MT)
4,1968,6996,(1000 MT)
5,1969,7158,(1000 MT)
6,1970,7731,(1000 MT)
7,1971,7450,(1000 MT)
8,1972,7275,(1000 MT)
9,1973,7145,(1000 MT)


In [132]:
df_milk_production_aus_M = transform_indexmundi_yearly_data(df_milk_production_aus, "milk_production")

In [133]:
df_milk_production_aus_M.isnull().sum()

milk_production    0
dtype: int64

In [134]:
df_milk_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   milk_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [135]:
df_milk_production_aus_M

Unnamed: 0_level_0,milk_production
month,Unnamed: 1_level_1
1990-01-31,536.25
1990-02-28,536.25
1990-03-31,536.25
1990-04-30,536.25
1990-05-31,536.25
...,...
2021-08-31,750.00
2021-09-30,750.00
2021-10-31,750.00
2021-11-30,750.00


# Australia Dairy, Milk, Nonfat Dry Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=nonfat-dry-milk&graph=production

In [136]:
df_nonfatmilk_production_aus = pd.read_csv("data/aus/au-nonfat-dry-milk-production.csv")

In [137]:
df_nonfatmilk_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1964,52,(1000 MT)
1,1965,58,(1000 MT)
2,1966,63,(1000 MT)
3,1967,100,(1000 MT)
4,1968,104,(1000 MT)
5,1969,87,(1000 MT)
6,1970,112,(1000 MT)
7,1971,109,(1000 MT)
8,1972,107,(1000 MT)
9,1973,134,(1000 MT)


In [138]:
df_nonfatmilk_production_aus_M = transform_indexmundi_yearly_data(df_nonfatmilk_production_aus, "nonfatmilk_production")

In [139]:
df_nonfatmilk_production_aus_M.isnull().sum()

nonfatmilk_production    0
dtype: int64

In [140]:
df_nonfatmilk_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   nonfatmilk_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [141]:
df_nonfatmilk_production_aus_M

Unnamed: 0_level_0,nonfatmilk_production
month,Unnamed: 1_level_1
1990-01-31,12.0
1990-02-28,12.0
1990-03-31,12.0
1990-04-30,12.0
1990-05-31,12.0
...,...
2021-08-31,12.5
2021-09-30,12.5
2021-10-31,12.5
2021-11-30,12.5


# Australia Fish Meal Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=fish-meal&graph=production

In [142]:
df_fishmeal_production_aus = pd.read_csv("data/aus/au-fish-meal-production.csv")

In [143]:
df_fishmeal_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,2005,0,(1000 MT)
1,2006,0,(1000 MT)
2,2007,0,(1000 MT)
3,2008,0,(1000 MT)
4,2009,0,(1000 MT)
5,2010,0,(1000 MT)
6,2011,0,(1000 MT)
7,2012,0,(1000 MT)
8,2013,0,(1000 MT)
9,2014,0,(1000 MT)


In [144]:
df_fishmeal_production_aus_M = transform_indexmundi_yearly_data(df_fishmeal_production_aus, "fishmeal_production")

In [145]:
df_fishmeal_production_aus_M.isnull().sum()

fishmeal_production    0
dtype: int64

In [146]:
df_fishmeal_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 193 entries, 2005-12-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   fishmeal_production  193 non-null    float64
dtypes: float64(1)
memory usage: 3.0 KB


## final output

won't include this as all values are zero..

In [147]:
df_fishmeal_production_aus_M

Unnamed: 0_level_0,fishmeal_production
month,Unnamed: 1_level_1
2005-12-31,0.0
2006-01-31,0.0
2006-02-28,0.0
2006-03-31,0.0
2006-04-30,0.0
...,...
2021-08-31,0.0
2021-09-30,0.0
2021-10-31,0.0
2021-11-30,0.0


# Australia Milled Rice Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=milled-rice&graph=production

In [148]:
df_rice_production_aus = pd.read_csv("data/aus/au-milled-rice-production.csv")

In [149]:
df_rice_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,82,(1000 MT)
1,1961,96,(1000 MT)
2,1962,97,(1000 MT)
3,1963,102,(1000 MT)
4,1964,109,(1000 MT)
...,...,...,...
57,2017,457,(1000 MT)
58,2018,48,(1000 MT)
59,2019,36,(1000 MT)
60,2020,330,(1000 MT)


In [150]:
df_rice_production_aus_M = transform_indexmundi_yearly_data(df_rice_production_aus, "rice_production")

In [151]:
df_rice_production_aus_M.isnull().sum()

rice_production    0
dtype: int64

In [152]:
df_rice_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   rice_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [153]:
df_rice_production_aus_M

Unnamed: 0_level_0,rice_production
month,Unnamed: 1_level_1
1990-01-31,46.916667
1990-02-28,46.916667
1990-03-31,46.916667
1990-04-30,46.916667
1990-05-31,46.916667
...,...
2021-08-31,54.166667
2021-09-30,54.166667
2021-10-31,54.166667
2021-11-30,54.166667


# Australia Millet Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=millet&graph=production

In [154]:
df_millet_production_aus = pd.read_csv("data/aus/au-millet-production.csv")

In [155]:
df_millet_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,22,(1000 MT)
1,1961,37,(1000 MT)
2,1962,38,(1000 MT)
3,1963,26,(1000 MT)
4,1964,24,(1000 MT)
...,...,...,...
57,2017,36,(1000 MT)
58,2018,36,(1000 MT)
59,2019,36,(1000 MT)
60,2020,36,(1000 MT)


In [156]:
df_millet_production_aus_M = transform_indexmundi_yearly_data(df_millet_production_aus, "millet_production")

In [157]:
df_millet_production_aus_M.isnull().sum()

millet_production    0
dtype: int64

In [158]:
df_millet_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   millet_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [159]:
df_millet_production_aus_M

Unnamed: 0_level_0,millet_production
month,Unnamed: 1_level_1
1990-01-31,3.166667
1990-02-28,3.166667
1990-03-31,3.166667
1990-04-30,3.166667
1990-05-31,3.166667
...,...
2021-08-31,3.083333
2021-09-30,3.083333
2021-10-31,3.083333
2021-11-30,3.083333


# Australia Oats Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=oats&graph=production

In [160]:
df_oats_production_aus = pd.read_csv("data/aus/au-oats-production.csv")

In [161]:
df_oats_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,1381,(1000 MT)
1,1961,1000,(1000 MT)
2,1962,1248,(1000 MT)
3,1963,1238,(1000 MT)
4,1964,1271,(1000 MT)
...,...,...,...
57,2017,1227,(1000 MT)
58,2018,1135,(1000 MT)
59,2019,1143,(1000 MT)
60,2020,1675,(1000 MT)


In [162]:
df_oats_production_aus_M = transform_indexmundi_yearly_data(df_oats_production_aus, "oats_production")

In [163]:
df_oats_production_aus_M.isnull().sum()

oats_production    0
dtype: int64

In [164]:
df_oats_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   oats_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [165]:
df_oats_production_aus_M

Unnamed: 0_level_0,oats_production
month,Unnamed: 1_level_1
1990-01-31,129.500000
1990-02-28,129.500000
1990-03-31,129.500000
1990-04-30,129.500000
1990-05-31,129.500000
...,...
2021-08-31,129.166667
2021-09-30,129.166667
2021-10-31,129.166667
2021-11-30,129.166667


# Australia Swine Meat Production by Year


https://www.indexmundi.com/agriculture/?country=au&commodity=swine-meat&graph=production

In [166]:
df_swine_production_aus = pd.read_csv("data/aus/au-swine-meat-production.csv")

In [167]:
df_swine_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,107,(1000 MT CWE)
1,1961,114,(1000 MT CWE)
2,1962,122,(1000 MT CWE)
3,1963,111,(1000 MT CWE)
4,1964,118,(1000 MT CWE)
...,...,...,...
58,2018,424,(1000 MT CWE)
59,2019,398,(1000 MT CWE)
60,2020,419,(1000 MT CWE)
61,2021,440,(1000 MT CWE)


In [168]:
df_swine_production_aus_M = transform_indexmundi_yearly_data(df_swine_production_aus, "swine_production")

In [169]:
df_swine_production_aus_M.isnull().sum()

swine_production    0
dtype: int64

In [170]:
df_swine_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   swine_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [171]:
df_swine_production_aus_M

Unnamed: 0_level_0,swine_production
month,Unnamed: 1_level_1
1990-01-31,26.583333
1990-02-28,26.583333
1990-03-31,26.583333
1990-04-30,26.583333
1990-05-31,26.583333
...,...
2021-08-31,36.666667
2021-09-30,36.666667
2021-10-31,36.666667
2021-11-30,36.666667


# Australia Wheat Production by Year

https://www.indexmundi.com/agriculture/?country=au&commodity=wheat&graph=production

In [172]:
df_wheat_production_aus = pd.read_csv("data/aus/au-wheat-production.csv")

In [173]:
df_wheat_production_aus

Unnamed: 0,Market Year,Value,Unit Description
0,1960,7450,(1000 MT)
1,1961,6727,(1000 MT)
2,1962,8353,(1000 MT)
3,1963,8925,(1000 MT)
4,1964,10037,(1000 MT)
...,...,...,...
57,2017,20941,(1000 MT)
58,2018,17598,(1000 MT)
59,2019,14480,(1000 MT)
60,2020,33300,(1000 MT)


In [174]:
df_wheat_production_aus_M = transform_indexmundi_yearly_data(df_wheat_production_aus, "wheat_production")

In [175]:
df_wheat_production_aus_M.isnull().sum()

wheat_production    0
dtype: int64

In [176]:
df_wheat_production_aus_M.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 384 entries, 1990-01-31 to 2021-12-31
Freq: M
Data columns (total 1 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   wheat_production  384 non-null    float64
dtypes: float64(1)
memory usage: 6.0 KB


## final output

In [177]:
df_wheat_production_aus_M

Unnamed: 0_level_0,wheat_production
month,Unnamed: 1_level_1
1990-01-31,1255.500000
1990-02-28,1255.500000
1990-03-31,1255.500000
1990-04-30,1255.500000
1990-05-31,1255.500000
...,...
2021-08-31,2833.333333
2021-09-30,2833.333333
2021-10-31,2833.333333
2021-11-30,2833.333333


# AUD/NZD

https://www.investing.com/currencies/aud-nzd-historical-data

In [178]:
df_audnzd = prepare_forex_data("data/aus/AUD_NZD Historical Data.csv")

In [179]:
df_audnzd.rename(columns={'price': 'audnzd'}, inplace=True)

In [180]:
df_audnzd.isnull().sum()

audnzd    0
dtype: int64

## final output

In [181]:
df_audnzd

Unnamed: 0_level_0,audnzd
date,Unnamed: 1_level_1
1990-01-31,1.2843
1990-02-28,1.2939
1990-03-31,1.3016
1990-04-30,1.3059
1990-05-31,1.3310
...,...
2022-01-31,1.0739
2022-02-28,1.0738
2022-03-31,1.0780
2022-04-30,1.0933


# AUD/CNY

https://www.investing.com/currencies/aud-cny-historical-data

In [182]:
df_audcny = prepare_forex_data("data/aus/AUD_CNY Historical Data.csv")

In [183]:
df_audcny.rename(columns={'price': 'audcny'}, inplace=True)

In [184]:
df_audcny.isnull().sum()

audcny    0
dtype: int64

## final output

In [185]:
df_audcny

Unnamed: 0_level_0,audcny
date,Unnamed: 1_level_1
1990-01-31,3.6494
1990-02-28,3.5968
1990-03-31,3.5731
1990-04-30,3.5566
1990-05-31,3.6328
...,...
2022-01-31,4.4931
2022-02-28,4.5812
2022-03-31,4.7418
2022-04-30,4.6672


# AUD/USD

https://www.investing.com/currencies/aud-usd-historical-data

In [186]:
df_audusd = prepare_forex_data("data/aus/AUD_USD Historical Data.csv")

In [187]:
df_audusd.rename(columns={'price': 'audusd'}, inplace=True)

In [188]:
df_audusd.isnull().sum()

audusd    0
dtype: int64

## final output

In [189]:
df_audusd

Unnamed: 0_level_0,audusd
date,Unnamed: 1_level_1
1990-01-31,0.7712
1990-02-28,0.7601
1990-03-31,0.7553
1990-04-30,0.7515
1990-05-31,0.7677
...,...
2022-01-31,0.7064
2022-02-28,0.7262
2022-03-31,0.7480
2022-04-30,0.7063


# Australian rain

http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode=139&p_display_type=dataFile&p_startYear=&p_c=&p_stn_num=042027

Weather station is in Queensland because most of the cattle production happens there:

https://www.beefcentral.com/production/australias-20-largest-regions-for-cattle-population/

In [190]:
df_rain_aus = pd.read_csv("data/aus/weather/rain/queensland_rain.csv")

In [191]:
df_rain_aus

Unnamed: 0,Product code,Station number,Year,Month,Monthly Precipitation Total (millimetres),Quality
0,IDCJAC0001,42027,1912,12,6.4,Y
1,IDCJAC0001,42027,1912,11,51.8,Y
2,IDCJAC0001,42027,1912,10,44.1,Y
3,IDCJAC0001,42027,1912,9,12.7,Y
4,IDCJAC0001,42027,1912,8,25.7,Y
...,...,...,...,...,...,...
1269,IDCJAC0001,42027,2021,5,24.0,N
1270,IDCJAC0001,42027,2021,4,20.0,N
1271,IDCJAC0001,42027,2021,3,243.3,N
1272,IDCJAC0001,42027,2021,2,106.0,N


In [192]:
df_rain_aus = df_rain_aus.drop(["Product code","Station number","Quality"], axis=1)

In [193]:
df_rain_aus = df_rain_aus.rename(columns={"Monthly Precipitation Total (millimetres)" : "rain"})

In [194]:
df_rain_aus["date"] = df_rain_aus.apply(lambda row : "01/" + str(int(row[1])) + "/" + str(int(row[0]))  , axis=1)

In [195]:
df_rain_aus = df_rain_aus.drop(["Year","Month"], axis=1)

In [196]:
df_rain_aus

Unnamed: 0,rain,date
0,6.4,01/12/1912
1,51.8,01/11/1912
2,44.1,01/10/1912
3,12.7,01/9/1912
4,25.7,01/8/1912
...,...,...
1269,24.0,01/5/2021
1270,20.0,01/4/2021
1271,243.3,01/3/2021
1272,106.0,01/2/2021


In [197]:
# hash works on windos only for date format month wihtout padding
# https://stackoverflow.com/questions/9525944/python-datetime-formatting-without-zero-padding
# linux uses a -
df_rain_aus["date"] = pd.to_datetime(df_rain_aus["date"], format='%d/%m/%Y') + MonthEnd(1)

In [198]:
df_rain_aus.sort_values(by=['date'], inplace=True, ascending=True)

In [199]:
df_rain_aus = df_rain_aus.set_index("date")

In [200]:
df_rain_aus = df_rain_aus[df_rain_aus.index.year > 1998]

In [201]:
df_rain_aus.isnull().sum()

rain    0
dtype: int64

In [202]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_rain_aus.index)

DatetimeIndex(['1999-09-30', '2000-07-31', '2001-02-28', '2001-06-30',
               '2004-01-31', '2005-06-30', '2007-12-31', '2009-04-30',
               '2010-10-31', '2012-02-29', '2014-10-31', '2014-12-31',
               '2015-07-31', '2015-09-30', '2015-12-31', '2016-11-30',
               '2016-12-31', '2017-09-30', '2018-05-31', '2018-07-31',
               '2019-01-31', '2019-04-30', '2019-05-31', '2019-06-30',
               '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',
               '2019-11-30', '2019-12-31'],
              dtype='datetime64[ns]', freq=None)

In [203]:
df_rain_aus = df_rain_aus.resample('M').last().ffill()

In [204]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_rain_aus.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

## final output

In [205]:
df_rain_aus

Unnamed: 0_level_0,rain
date,Unnamed: 1_level_1
1999-01-31,147.6
1999-02-28,21.4
1999-03-31,116.8
1999-04-30,22.9
1999-05-31,2.4
...,...
2021-08-31,16.0
2021-09-30,7.0
2021-10-31,25.0
2021-11-30,67.1


# Australian sunshine

http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode=203&p_display_type=dataFile&p_startYear=&p_c=&p_stn_num=042027

Weather station is in Queensland because most of the cattle production happens there:

https://www.beefcentral.com/production/australias-20-largest-regions-for-cattle-population/

In [206]:
df_sun_aus = pd.read_csv("data/aus/weather/sunshine/queensland_sunshine.csv")

In [207]:
df_sun_aus

Unnamed: 0,Product code,Station number,Year,Month,Monthly mean daily global solar exposure (MJ/m*m)
0,IDCJAC0003,42027,1990,1,27.9
1,IDCJAC0003,42027,1990,2,26.0
2,IDCJAC0003,42027,1990,3,19.0
3,IDCJAC0003,42027,1990,4,11.9
4,IDCJAC0003,42027,1990,5,13.3
...,...,...,...,...,...
383,IDCJAC0003,42027,2022,1,26.0
384,IDCJAC0003,42027,2022,2,21.5
385,IDCJAC0003,42027,2022,3,20.9
386,IDCJAC0003,42027,2022,4,16.9


In [208]:
df_sun_aus = df_sun_aus.drop(["Product code","Station number"], axis=1)

In [209]:
df_sun_aus = df_sun_aus.rename(columns={"Monthly mean daily global solar exposure (MJ/m*m)" : "sunshine"})

In [210]:
df_sun_aus["date"] = df_sun_aus.apply(lambda row : "01/" + str(int(row[1])) + "/" + str(int(row[0]))  , axis=1)

In [211]:
df_sun_aus = df_sun_aus.drop(["Year","Month"], axis=1)

In [212]:
df_sun_aus

Unnamed: 0,sunshine,date
0,27.9,01/1/1990
1,26.0,01/2/1990
2,19.0,01/3/1990
3,11.9,01/4/1990
4,13.3,01/5/1990
...,...,...
383,26.0,01/1/2022
384,21.5,01/2/2022
385,20.9,01/3/2022
386,16.9,01/4/2022


In [213]:
# hash works on windos only for date format month wihtout padding
# https://stackoverflow.com/questions/9525944/python-datetime-formatting-without-zero-padding
# linux uses a -
df_sun_aus["date"] = pd.to_datetime(df_sun_aus["date"], format='%d/%m/%Y') + MonthEnd(1)

In [214]:
df_sun_aus.sort_values(by=['date'], inplace=True, ascending=True)

In [215]:
df_sun_aus = df_sun_aus.set_index("date")

In [216]:
df_sun_aus = df_sun_aus[df_sun_aus.index.year > 1998]

In [217]:
df_sun_aus.isnull().sum()

sunshine    0
dtype: int64

In [218]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_sun_aus.index)

DatetimeIndex(['2005-12-31'], dtype='datetime64[ns]', freq=None)

In [219]:
df_sun_aus = df_sun_aus.resample('M').last().ffill()

In [220]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_sun_aus.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

## final output

In [221]:
df_sun_aus

Unnamed: 0_level_0,sunshine
date,Unnamed: 1_level_1
1999-01-31,27.0
1999-02-28,23.8
1999-03-31,21.3
1999-04-30,18.0
1999-05-31,13.5
...,...
2022-01-31,26.0
2022-02-28,21.5
2022-03-31,20.9
2022-04-30,16.9


# Australian temperature

http://www.bom.gov.au/jsp/ncc/cdio/weatherData/av?p_nccObsCode=36&p_display_type=dataFile&p_startYear=&p_c=&p_stn_num=053115

Weather station is in Queensland because most of the cattle production happens there:

https://www.beefcentral.com/production/australias-20-largest-regions-for-cattle-population/

In [222]:
df_temp_aus = pd.read_csv("data/aus/weather/temperature/queensland_temperature.csv")

In [223]:
df_temp_aus

Unnamed: 0,Product code,Bureau of Meteorology station number,Year,Month,Mean maximum temperature (°C),Quality
0,IDCJAC0002,53115,1995,5,20.4,Y
1,IDCJAC0002,53115,1995,6,17.7,Y
2,IDCJAC0002,53115,1995,7,17.2,Y
3,IDCJAC0002,53115,1995,8,22.5,Y
4,IDCJAC0002,53115,1995,9,23.9,Y
...,...,...,...,...,...,...
318,IDCJAC0002,53115,2021,11,28.9,Y
319,IDCJAC0002,53115,2021,12,33.1,Y
320,IDCJAC0002,53115,2022,1,34.8,Y
321,IDCJAC0002,53115,2022,2,32.8,Y


In [224]:
df_temp_aus = df_temp_aus.drop(["Product code","Bureau of Meteorology station number", "Quality"], axis=1)

In [225]:
df_temp_aus = df_temp_aus.rename(columns={"Mean maximum temperature (°C)" : "temp_cel"})

In [226]:
df_temp_aus["date"] = df_temp_aus.apply(lambda row : "01/" + str(int(row[1])) + "/" + str(int(row[0]))  , axis=1)

In [227]:
df_temp_aus = df_temp_aus.drop(["Year","Month"], axis=1)

In [228]:
df_temp_aus

Unnamed: 0,temp_cel,date
0,20.4,01/5/1995
1,17.7,01/6/1995
2,17.2,01/7/1995
3,22.5,01/8/1995
4,23.9,01/9/1995
...,...,...
318,28.9,01/11/2021
319,33.1,01/12/2021
320,34.8,01/1/2022
321,32.8,01/2/2022


In [229]:
# hash works on windos only for date format month wihtout padding
# https://stackoverflow.com/questions/9525944/python-datetime-formatting-without-zero-padding
# linux uses a -
df_temp_aus["date"] = pd.to_datetime(df_temp_aus["date"], format='%d/%m/%Y') + MonthEnd(1)

In [230]:
df_temp_aus.sort_values(by=['date'], inplace=True, ascending=True)

In [231]:
df_temp_aus = df_temp_aus.set_index("date")

In [232]:
df_temp_aus = df_temp_aus[df_temp_aus.index.year > 1998]

In [233]:
df_temp_aus.isnull().sum()

temp_cel    0
dtype: int64

In [234]:
df_temp_aus = df_temp_aus[df_temp_aus.index.year > 1998]

In [235]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_temp_aus.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

In [236]:
df_temp_aus = df_temp_aus.resample('M').last().ffill()

In [237]:
pd.date_range(start = '1999-01-01', end = '2020-12-31', freq='M' ).difference(df_temp_aus.index)

DatetimeIndex([], dtype='datetime64[ns]', freq=None)

## final output

In [238]:
df_temp_aus

Unnamed: 0_level_0,temp_cel
date,Unnamed: 1_level_1
1999-01-31,33.0
1999-02-28,32.1
1999-03-31,30.0
1999-04-30,25.3
1999-05-31,23.7
...,...
2021-11-30,28.9
2021-12-31,33.1
2022-01-31,34.8
2022-02-28,32.8


# Australian meat slaughterings and production

https://www.abs.gov.au/statistics/industry/agriculture/livestock-and-meat-australia/latest-release#data-download

In [239]:
# I downloaded 15 different excel sheets and took the total for australia (the whole state) figures to use 
# and pasted them into the csv file below
# all figures are in tonnes
df_meat_slau_prod = pd.read_csv("data/aus/slaughterings_and_production/aus_total_meat_slaughter_and_production.csv")

In [240]:
df_meat_slau_prod

Unnamed: 0,date,cattle_exclud_calves_prod,lambs_prod,sheep_prod,pigs_prod,total_beef_prod,veel_prod,bulls_bullocks_steers_slaughterings,calves_slaughterings,cattle_exclu_calves_slaughterings,cows_heifers_slaughterings,lamb_slaughterings,pig_slaughterings,sheep_slaughterings
0,Jul-1972,121000,25184,39814,18028,209456,5430,,196,567,,1593,365,2117
1,Aug-1972,127842,24196,37682,20091,216225,6414,,251,605,,1600,409,2017
2,Sep-1972,111796,22955,33739,18718,192245,5037,,170,541,,1535,380,1814
3,Oct-1972,110565,27916,41975,20007,204724,4261,,129,537,,1904,408,2213
4,Nov-1972,109471,30101,48027,20629,212670,4442,,104,538,,1956,420,2480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,Feb-2020,186445,44383,17844,31154,281399,1574,305.0,22,634,329.0,1809,402,736
572,Mar-2020,192183,44660,13962,37411,290281,2064,314.0,40,659,345.0,1833,475,572
573,Apr-2020,180626,38763,10135,33195,264617,1899,285.0,38,631,345.0,1583,416,411
574,May-2020,183105,36075,8161,35647,264540,1553,277.0,38,643,366.0,1450,443,326


In [241]:
df_meat_slau_prod.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 576 entries, 0 to 575
Data columns (total 14 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   date                                 576 non-null    object 
 1   cattle_exclud_calves_prod            576 non-null    int64  
 2   lambs_prod                           576 non-null    int64  
 3   sheep_prod                           576 non-null    int64  
 4   pigs_prod                            576 non-null    int64  
 5   total_beef_prod                      576 non-null    int64  
 6   veel_prod                            576 non-null    int64  
 7   bulls_bullocks_steers_slaughterings  528 non-null    float64
 8   calves_slaughterings                 576 non-null    int64  
 9   cattle_exclu_calves_slaughterings    576 non-null    int64  
 10  cows_heifers_slaughterings           528 non-null    float64
 11  lamb_slaughterings              

In [242]:
df_meat_slau_prod["date"] = pd.to_datetime(df_meat_slau_prod["date"], format='%b-%Y') + MonthEnd(1)

In [243]:
df_meat_slau_prod = df_meat_slau_prod.set_index("date")

In [244]:
df_meat_slau_prod = df_meat_slau_prod[df_meat_slau_prod.index.year > 1998]

In [245]:
df_meat_slau_prod.isnull().sum()

cattle_exclud_calves_prod              0
lambs_prod                             0
sheep_prod                             0
pigs_prod                              0
total_beef_prod                        0
veel_prod                              0
bulls_bullocks_steers_slaughterings    0
calves_slaughterings                   0
cattle_exclu_calves_slaughterings      0
cows_heifers_slaughterings             0
lamb_slaughterings                     0
pig_slaughterings                      0
sheep_slaughterings                    0
dtype: int64

In [246]:
df_missing = pd.date_range(start=df_meat_slau_prod.tail(1).index[0], end='31-DEC-2021', freq='M').to_frame(index=False, name='date')[1:]

In [247]:
for column_index_name in df_meat_slau_prod:
    df_missing[column_index_name] = np.nan

In [248]:
df_missing = df_missing.set_index("date")

In [249]:
df_missing

Unnamed: 0_level_0,cattle_exclud_calves_prod,lambs_prod,sheep_prod,pigs_prod,total_beef_prod,veel_prod,bulls_bullocks_steers_slaughterings,calves_slaughterings,cattle_exclu_calves_slaughterings,cows_heifers_slaughterings,lamb_slaughterings,pig_slaughterings,sheep_slaughterings
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-07-31,,,,,,,,,,,,,
2020-08-31,,,,,,,,,,,,,
2020-09-30,,,,,,,,,,,,,
2020-10-31,,,,,,,,,,,,,
2020-11-30,,,,,,,,,,,,,
2020-12-31,,,,,,,,,,,,,
2021-01-31,,,,,,,,,,,,,
2021-02-28,,,,,,,,,,,,,
2021-03-31,,,,,,,,,,,,,
2021-04-30,,,,,,,,,,,,,


In [250]:
df_meat_slau_prod = pd.concat([df_meat_slau_prod, df_missing])

In [251]:
df_meat_slau_prod = df_meat_slau_prod.interpolate(method='linear', limit_direction='both')

## final output

In [252]:
df_meat_slau_prod

Unnamed: 0_level_0,cattle_exclud_calves_prod,lambs_prod,sheep_prod,pigs_prod,total_beef_prod,veel_prod,bulls_bullocks_steers_slaughterings,calves_slaughterings,cattle_exclu_calves_slaughterings,cows_heifers_slaughterings,lamb_slaughterings,pig_slaughterings,sheep_slaughterings
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1999-01-31,138508.0,23845.0,27998.0,25360.0,217522.0,1812.0,269.0,35.0,558.0,288.0,1228.0,360.0,1326.0
1999-02-28,172214.0,24371.0,30591.0,29348.0,258415.0,1892.0,335.0,36.0,677.0,343.0,1252.0,423.0,1467.0
1999-03-31,180539.0,27027.0,29581.0,32240.0,271377.0,1990.0,364.0,46.0,705.0,341.0,1392.0,455.0,1464.0
1999-04-30,160523.0,24327.0,25038.0,30218.0,242372.0,2267.0,304.0,57.0,632.0,327.0,1250.0,415.0,1235.0
1999-05-31,166786.0,24496.0,25315.0,33312.0,252699.0,2790.0,314.0,80.0,665.0,351.0,1246.0,449.0,1239.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-31,176712.0,37388.0,7983.0,38969.0,262689.0,1636.0,272.0,38.0,612.0,340.0,1487.0,486.0,301.0
2021-09-30,176712.0,37388.0,7983.0,38969.0,262689.0,1636.0,272.0,38.0,612.0,340.0,1487.0,486.0,301.0
2021-10-31,176712.0,37388.0,7983.0,38969.0,262689.0,1636.0,272.0,38.0,612.0,340.0,1487.0,486.0,301.0
2021-11-30,176712.0,37388.0,7983.0,38969.0,262689.0,1636.0,272.0,38.0,612.0,340.0,1487.0,486.0,301.0


#  Consumer Price Index of All Items in Australia

https://fred.stlouisfed.org/series/AUSCPIALLQINMEI

In [253]:
df_cpi_all = pd.read_csv("data/aus/cpi/Consumer Price Index of All Items in Australia.csv")

In [254]:
df_cpi_all

Unnamed: 0,DATE,AUSCPIALLQINMEI
0,1960-01-01,6.965405
1,1960-04-01,7.058277
2,1960-07-01,7.151149
3,1960-10-01,7.244021
4,1961-01-01,7.244021
...,...,...
243,2020-10-01,108.846065
244,2021-01-01,109.496169
245,2021-04-01,110.332018
246,2021-07-01,111.167866


In [255]:
df_cpi_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 248 entries, 0 to 247
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   DATE             248 non-null    object 
 1   AUSCPIALLQINMEI  248 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.0+ KB


In [256]:
df_cpi_all = transform_fred_stlouisfed_quarterlydata(df_cpi_all, "cpi_all")

In [257]:
df_cpi_all.isnull().sum()

cpi_all    0
dtype: int64

## final output

In [258]:
df_cpi_all

Unnamed: 0_level_0,cpi_all
date,Unnamed: 1_level_1
1990-01-31,52.194103
1990-02-28,53.029951
1990-03-31,53.029951
1990-04-30,53.029951
1990-05-31,53.401440
...,...
2021-08-31,112.653819
2021-09-30,112.653819
2021-10-31,112.653819
2021-11-30,112.653819


#  Real Residential Property Prices for Australia

https://fred.stlouisfed.org/series/QAUR628BIS

In [259]:
df_cpi_residential = pd.read_csv("data/aus/cpi/Real Residential Property Prices for Australia.csv")

In [260]:
df_cpi_residential = transform_fred_stlouisfed_quarterlydata(df_cpi_residential, "cpi_residential")

In [261]:
df_cpi_residential.isnull().sum()

cpi_residential    0
dtype: int64

## final output

In [262]:
df_cpi_residential

Unnamed: 0_level_0,cpi_residential
date,Unnamed: 1_level_1
1990-01-31,48.2928
1990-02-28,47.9595
1990-03-31,47.9595
1990-04-30,47.9595
1990-05-31,47.2009
...,...
2021-08-31,141.0406
2021-09-30,141.0406
2021-10-31,141.0406
2021-11-30,141.0406


# Consumer Price Index: All Items Excluding Food and Energy for Australia 

https://fred.stlouisfed.org/series/AUSCPICORQINMEI

In [263]:
df_cpi_all_ex_food_energy = pd.read_csv("data/aus/cpi/Consumer Price Index All Items Excluding Food and Energy for Australia.csv")

In [264]:
df_cpi_all_ex_food_energy

Unnamed: 0,DATE,AUSCPICORQINMEI
0,1971-04-01,9.657205
1,1971-07-01,9.976837
2,1971-10-01,10.165847
3,1972-01-01,10.264859
4,1972-04-01,10.376892
...,...,...
198,2020-10-01,107.521068
199,2021-01-01,107.958167
200,2021-04-01,108.441248
201,2021-07-01,109.197185


In [265]:
df_cpi_all_ex_food_energy = transform_fred_stlouisfed_quarterlydata(df_cpi_all_ex_food_energy, "cpi_all_ex_food_energy")

In [266]:
df_cpi_all_ex_food_energy.isnull().sum()

cpi_all_ex_food_energy    0
dtype: int64

## final output

In [267]:
df_cpi_all_ex_food_energy

Unnamed: 0_level_0,cpi_all_ex_food_energy
date,Unnamed: 1_level_1
1990-01-31,53.434080
1990-02-28,54.245145
1990-03-31,54.245145
1990-04-30,54.245145
1990-05-31,54.712763
...,...
2021-08-31,110.687578
2021-09-30,110.687578
2021-10-31,110.687578
2021-11-30,110.687578


# Consumer Price Index: Food for Australia

https://fred.stlouisfed.org/series/AUSCPIFODQINMEI

In [268]:
df_cpi_food = pd.read_csv("data/aus/cpi/Consumer Price Index Food for Australia.csv")

In [269]:
df_cpi_food

Unnamed: 0,DATE,AUSCPIFODQINMEI
0,1976-07-01,17.208328
1,1976-10-01,17.972852
2,1977-01-01,18.183672
3,1977-04-01,18.649332
4,1977-07-01,19.467801
...,...,...
162,2017-01-01,106.900291
163,2017-04-01,106.204597
164,2017-07-01,104.641450
165,2017-10-01,106.116552


In [270]:
df_cpi_food = transform_fred_stlouisfed_quarterlydata(df_cpi_food, "cpi_food")

In [271]:
df_cpi_food.isnull().sum()

cpi_food    0
dtype: int64

## final output

In [272]:
df_cpi_food

Unnamed: 0_level_0,cpi_food
date,Unnamed: 1_level_1
1990-01-31,52.899823
1990-02-28,54.487778
1990-03-31,54.487778
1990-04-30,54.487778
1990-05-31,53.684193
...,...
2021-08-31,106.631196
2021-09-30,106.631196
2021-10-31,106.631196
2021-11-30,106.631196


# Producer Prices Index: Economic Activities: Total Manufacturing for Australia

https://fred.stlouisfed.org/series/PIEAMP01AUQ661N

In [273]:
df_prod_index = pd.read_csv("data/aus/cpi/Producer Prices Index Economic Activities Total Manufacturing for Australia.csv")

In [274]:
df_prod_index = transform_fred_stlouisfed_quarterlydata(df_prod_index, "cpi_prod_index")

In [275]:
df_prod_index.isnull().sum()

cpi_prod_index    0
dtype: int64

## final output

In [276]:
df_prod_index

Unnamed: 0_level_0,cpi_prod_index
date,Unnamed: 1_level_1
1990-01-31,56.813266
1990-02-28,57.486181
1990-03-31,57.486181
1990-04-30,57.486181
1990-05-31,58.159096
...,...
2021-08-31,125.642874
2021-09-30,125.642874
2021-10-31,125.642874
2021-11-30,125.642874


# Australian petrol and diesel


https://www.fuelwatch.wa.gov.au/retail/monthly

In [277]:
# This  csv file was manually preprocessed from two xlsx files from fuel watch that contained the 
# historical petrol and diesel prices for Australia
# Monthly-Diesel-prices-Metro-199901-202205.csv & Monthly-ULP-prices-Metro-199901-202205.csv
df_aus_fuel = pd.read_csv("data/aus/fuel/aus_petrol_diesel_prices_2001-2022.csv")

In [278]:
# prices are AUD and cost per litre
df_aus_fuel

Unnamed: 0,month,diesel,petrol
0,May-22,194.5,173.2
1,Apr-22,192.7,168.8
2,Mar-22,207.4,198.2
3,Feb-22,176.5,178.6
4,Jan-22,164.6,167.0
...,...,...,...
276,May-99,,
277,Apr-99,,
278,Mar-99,,
279,Feb-99,,


In [279]:
df_aus_fuel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 281 entries, 0 to 280
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   month   281 non-null    object 
 1   diesel  257 non-null    float64
 2   petrol  257 non-null    float64
dtypes: float64(2), object(1)
memory usage: 6.7+ KB


In [280]:
df_aus_fuel["month"] = pd.to_datetime(df_aus_fuel["month"], format='%b-%y') + MonthEnd(1)

In [281]:
df_aus_fuel

Unnamed: 0,month,diesel,petrol
0,2022-05-31,194.5,173.2
1,2022-04-30,192.7,168.8
2,2022-03-31,207.4,198.2
3,2022-02-28,176.5,178.6
4,2022-01-31,164.6,167.0
...,...,...,...
276,1999-05-31,,
277,1999-04-30,,
278,1999-03-31,,
279,1999-02-28,,


In [282]:
df_aus_fuel = df_aus_fuel.set_index("month")

In [283]:
df_aus_fuel

Unnamed: 0_level_0,diesel,petrol
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-05-31,194.5,173.2
2022-04-30,192.7,168.8
2022-03-31,207.4,198.2
2022-02-28,176.5,178.6
2022-01-31,164.6,167.0
...,...,...
1999-05-31,,
1999-04-30,,
1999-03-31,,
1999-02-28,,


In [284]:
df_aus_fuel['diesel'] = df_aus_fuel['diesel'].interpolate(method='linear')

In [285]:
df_aus_fuel['petrol'] = df_aus_fuel['petrol'].interpolate(method='linear')

In [286]:
df_aus_fuel

Unnamed: 0_level_0,diesel,petrol
month,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-05-31,194.5,173.2
2022-04-30,192.7,168.8
2022-03-31,207.4,198.2
2022-02-28,176.5,178.6
2022-01-31,164.6,167.0
...,...,...
1999-05-31,93.8,85.0
1999-04-30,93.8,85.0
1999-03-31,93.8,85.0
1999-02-28,93.8,85.0


In [287]:
df_aus_fuel.isnull().sum()

diesel    0
petrol    0
dtype: int64

In [288]:
df_aus_fuel.sort_values(by=['month'], inplace=True, ascending=True)

## final output

In [289]:
df_aus_fuel

Unnamed: 0_level_0,diesel,petrol
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-01-31,93.8,85.0
1999-02-28,93.8,85.0
1999-03-31,93.8,85.0
1999-04-30,93.8,85.0
1999-05-31,93.8,85.0
...,...,...
2022-01-31,164.6,167.0
2022-02-28,176.5,178.6
2022-03-31,207.4,198.2
2022-04-30,192.7,168.8


# COLLATE ALL DATAFRAMES TOGETHER

## merge Australian beef and meat consumption

In [290]:
df_final= pd.merge(df_beef_aus, df_meat_consumption_aus_monthly, how='inner', left_index=True, right_index=True)

In [291]:
df_final.isnull().sum().sum()

0

In [292]:
df_final.shape

(276, 2)

In [293]:
df_final

Unnamed: 0_level_0,beef_per_kg,meat_consumption
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-01-31,2.71,61.833333
1999-02-28,2.80,61.833333
1999-03-31,2.79,61.833333
1999-04-30,2.68,61.833333
1999-05-31,2.61,61.833333
...,...,...
2021-08-31,7.69,50.416667
2021-09-30,7.75,50.416667
2021-10-31,7.70,50.416667
2021-11-30,8.12,50.416667


## merge Australian beef and meat exports

In [294]:
df_final= pd.merge(df_final, df_exports_aus_M, how='inner', left_index=True, right_index=True)

In [295]:
df_final.isnull().sum().sum()

0

In [296]:
df_final.shape

(276, 3)

## merge in meat imports

In [297]:
df_final= pd.merge(df_final, df_imports_aus_M, how='inner', left_index=True, right_index=True)

In [298]:
df_final.isnull().sum().sum()

0

In [299]:
df_final.shape

(276, 4)

## merge in total distribution

In [300]:
df_final= pd.merge(df_final, df_meat_dist_aus_M, how='inner', left_index=True, right_index=True)

In [301]:
df_final.isnull().sum().sum()

0

In [302]:
df_final.shape

(276, 5)

## merge in meat supply

In [303]:
df_final= pd.merge(df_final, df_beef_supply_aus_M, how='inner', left_index=True, right_index=True)

In [304]:
df_final.isnull().sum().sum()

0

In [305]:
df_final.shape

(276, 6)

## merge in cattle production

In [306]:
df_final= pd.merge(df_final, df_cattle_production_aus_M, how='inner', left_index=True, right_index=True)

In [307]:
df_final.isnull().sum().sum()

0

In [308]:
df_final.shape

(276, 7)

## merge in swine animal count production

In [309]:
df_final= pd.merge(df_final, df_swine_production_aus_M, how='inner', left_index=True, right_index=True)

In [310]:
df_final.isnull().sum().sum()

0

In [311]:
df_final.shape

(276, 8)

## merge in barley production

In [312]:
df_final= pd.merge(df_final, df_barley_production_aus_M, how='inner', left_index=True, right_index=True)

In [313]:
df_final.isnull().sum().sum()

0

In [314]:
df_final.shape

(276, 9)

## merge in meat and veal production

In [315]:
df_final= pd.merge(df_final, df_beef_and_veal_production_aus_M, how='inner', left_index=True, right_index=True)

In [316]:
df_final.isnull().sum().sum()

0

In [317]:
df_final.shape

(276, 10)

## merge in poultry production

In [318]:
df_final= pd.merge(df_final, df_poultry_production_aus_M, how='inner', left_index=True, right_index=True)

In [319]:
df_final.isnull().sum().sum()

0

In [320]:
df_final.shape

(264, 11)

## merge in corn production

In [321]:
df_final= pd.merge(df_final, df_corn_production_aus_M, how='inner', left_index=True, right_index=True)

In [322]:
df_final.isnull().sum().sum()

0

In [323]:
df_final.shape

(264, 12)

## merge cotton production

In [324]:
df_final= pd.merge(df_final, df_cotton_production_aus_M, how='inner', left_index=True, right_index=True)

In [325]:
df_final.isnull().sum().sum()

0

In [326]:
df_final.shape

(264, 13)

## merge in butter production

In [327]:
df_final= pd.merge(df_final, df_butter_production_aus_M, how='inner', left_index=True, right_index=True)

In [328]:
df_final.isnull().sum().sum()

0

In [329]:
df_final.shape

(264, 14)

## merge in cheese production

In [330]:
df_final= pd.merge(df_final, df_cheese_production_aus_M, how='inner', left_index=True, right_index=True)

In [331]:
df_final.isnull().sum().sum()

0

In [332]:
df_final.shape

(264, 15)

## merge in milk powder production

In [333]:
df_final= pd.merge(df_final, df_powdered_milk_production_aus_M, how='inner', left_index=True, right_index=True)

In [334]:
df_final.isnull().sum().sum()

0

In [335]:
df_final.shape

(264, 16)

## merge in milk production

In [336]:
df_final= pd.merge(df_final, df_milk_production_aus_M, how='inner', left_index=True, right_index=True)

In [337]:
df_final.isnull().sum().sum()

0

In [338]:
df_final.shape

(264, 17)

## merge in dry milk production

In [339]:
df_final= pd.merge(df_final, df_nonfatmilk_production_aus_M, how='inner', left_index=True, right_index=True)

In [340]:
df_final.isnull().sum().sum()

0

In [341]:
df_final.shape

(264, 18)

## merge in milled rice production

In [342]:
df_final= pd.merge(df_final, df_rice_production_aus_M, how='inner', left_index=True, right_index=True)

In [343]:
df_final.isnull().sum().sum()

0

In [344]:
df_final.shape

(264, 19)

## merge in millet production

In [345]:
df_final= pd.merge(df_final, df_millet_production_aus_M, how='inner', left_index=True, right_index=True)

In [346]:
df_final.isnull().sum().sum()

0

In [347]:
df_final.shape

(264, 20)

## merge in oats production

In [348]:
df_final= pd.merge(df_final, df_oats_production_aus_M, how='inner', left_index=True, right_index=True)

In [349]:
df_final.isnull().sum().sum()

0

In [350]:
df_final.shape

(264, 21)

## merge in swine production

In [351]:
df_final= pd.merge(df_final, df_swine_production_aus_M, how='inner', left_index=True, right_index=True)

In [352]:
df_final.isnull().sum().sum()

0

In [353]:
df_final.shape

(264, 22)

## merge in wheat production

In [354]:
df_final= pd.merge(df_final, df_wheat_production_aus_M, how='inner', left_index=True, right_index=True)

In [355]:
df_final.isnull().sum().sum()

0

In [356]:
df_final.shape

(264, 23)

## merge in AUDNZD production

In [357]:
df_final= pd.merge(df_final, df_audnzd, how='inner', left_index=True, right_index=True)

In [358]:
df_final.isnull().sum().sum()

0

In [359]:
df_final.shape

(264, 24)

## merge in AUDCNY production

In [360]:
df_final= pd.merge(df_final, df_audcny, how='inner', left_index=True, right_index=True)

In [361]:
df_final.isnull().sum().sum()

0

In [362]:
df_final.shape

(264, 25)

## merge in AUDUSD production

In [363]:
df_final= pd.merge(df_final, df_audusd, how='inner', left_index=True, right_index=True)

In [364]:
df_final.isnull().sum().sum()

0

In [365]:
df_final.shape

(264, 26)

## merge in Australian rain

In [366]:
df_final= pd.merge(df_final, df_rain_aus, how='inner', left_index=True, right_index=True)

In [367]:
df_final.isnull().sum().sum()

0

In [368]:
df_final.shape

(264, 27)

## merge in Australian sunshine

In [369]:
df_final= pd.merge(df_final, df_sun_aus, how='inner', left_index=True, right_index=True)

In [370]:
df_final.isnull().sum().sum()

0

In [371]:
df_final.shape

(264, 28)

## merge in Australian temperature

In [372]:
df_final= pd.merge(df_final, df_temp_aus, how='inner', left_index=True, right_index=True)

In [373]:
df_final.isnull().sum().sum()

0

In [374]:
df_final.shape

(264, 29)

## merge in Australian meat slaughterings and production

In [376]:
df_final= pd.merge(df_final, df_meat_slau_prod, how='inner', left_index=True, right_index=True)

In [377]:
df_final.isnull().sum().sum()

0

In [378]:
df_final.shape

(264, 42)

## merge in Australian consumer price index

In [379]:
df_final= pd.merge(df_final, df_cpi_all, how='inner', left_index=True, right_index=True)

In [380]:
df_final.isnull().sum().sum()

0

In [381]:
df_final.shape

(264, 43)

## merge in Australian residential property prices

In [382]:
df_final= pd.merge(df_final, df_cpi_residential, how='inner', left_index=True, right_index=True)

In [383]:
df_final.isnull().sum().sum()

0

In [384]:
df_final.shape

(264, 44)

## merge in Australian cpi excluding food and energy

In [385]:
df_final= pd.merge(df_final, df_cpi_all_ex_food_energy, how='inner', left_index=True, right_index=True)

In [386]:
df_final.isnull().sum().sum()

0

In [387]:
df_final.shape

(264, 45)

## merge in Australian CPI Food

In [388]:
df_final= pd.merge(df_final, df_cpi_food, how='inner', left_index=True, right_index=True)

In [389]:
df_final.isnull().sum().sum()

0

In [390]:
df_final.shape

(264, 46)

## merge in Australian Producer Price Index

In [391]:
df_final= pd.merge(df_final, df_prod_index, how='inner', left_index=True, right_index=True)

In [392]:
df_final.isnull().sum().sum()

0

In [393]:
df_final.shape

(264, 47)

## merge in Australian Petrol and Diesel

In [394]:
df_final= pd.merge(df_final, df_aus_fuel, how='inner', left_index=True, right_index=True)

In [395]:
df_final.isnull().sum().sum()

0

In [396]:
df_final.shape

(264, 49)

# Save collated dataframe to csv

In [399]:
df_final.to_csv("df_final_australian_beef_feat_cols_and_target_values_1999-2020.csv", encoding='utf-8')

In [400]:
df_final

Unnamed: 0,beef_per_kg,meat_consumption,meat_exports,meat_imports,meat_distribution,meat_supply,cattle_production,swine_production_x,barley_production,beef_and_veal_production,...,lamb_slaughterings,pig_slaughterings,sheep_slaughterings,cpi_all,cpi_residential,cpi_all_ex_food_energy,cpi_food,cpi_prod_index,diesel,petrol
1999-01-31,2.71,61.833333,104.083333,0.333333,168.083333,168.083333,879.416667,30.166667,419.333333,163.000000,...,1228.0,360.0,1326.0,62.967263,52.5088,64.117489,68.833314,64.792117,93.8,85.0
1999-02-28,2.80,61.833333,104.083333,0.333333,168.083333,168.083333,879.416667,30.166667,419.333333,163.000000,...,1252.0,423.0,1467.0,63.245879,53.4705,64.354054,68.941204,64.984379,93.8,85.0
1999-03-31,2.79,61.833333,104.083333,0.333333,168.083333,168.083333,879.416667,30.166667,419.333333,163.000000,...,1392.0,455.0,1464.0,63.245879,53.4705,64.354054,68.941204,64.984379,93.8,85.0
1999-04-30,2.68,61.833333,104.083333,0.333333,168.083333,168.083333,879.416667,30.166667,419.333333,163.000000,...,1250.0,415.0,1235.0,63.245879,53.4705,64.354054,68.941204,64.984379,93.8,85.0
1999-05-31,2.61,61.833333,104.083333,0.333333,168.083333,168.083333,879.416667,30.166667,419.333333,163.000000,...,1246.0,449.0,1239.0,63.803111,53.9905,64.836722,69.152023,66.330209,93.8,85.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-31,6.43,55.750000,122.750000,1.416667,178.500000,178.500000,691.666667,34.916667,1091.666667,177.083333,...,1487.0,486.0,301.0,108.846065,118.0338,107.521068,106.631196,112.472963,120.3,117.3
2020-09-30,6.36,55.750000,122.750000,1.416667,178.500000,178.500000,691.666667,34.916667,1091.666667,177.083333,...,1487.0,486.0,301.0,108.846065,118.0338,107.521068,106.631196,112.472963,119.6,117.1
2020-10-31,6.21,55.750000,122.750000,1.416667,178.500000,178.500000,691.666667,34.916667,1091.666667,177.083333,...,1487.0,486.0,301.0,108.846065,118.0338,107.521068,106.631196,112.472963,118.9,117.8
2020-11-30,6.07,55.750000,122.750000,1.416667,178.500000,178.500000,691.666667,34.916667,1091.666667,177.083333,...,1487.0,486.0,301.0,109.496169,123.6455,107.958167,106.631196,113.626532,118.2,115.6
