In [0]:
spark.conf.set("spark.sql.shuffle.partitions", sc.defaultParallelism*2)
dbutils.library.installPyPI('ordered_set')
dbutils.library.installPyPI('fredapi')
dbutils.library.installPyPI('azure-storage-file-datalake' ,version="12.2.0" )

In [0]:
import pandas as pd
from datetime import datetime
from ordered_set import OrderedSet
from fredapi import Fred as fred
from pyspark.sql.functions import col , row_number , year , to_timestamp, regexp_replace , lit , to_date
from pyspark.sql.types import * 
from pyspark.sql import Window
from delta.tables import *
from pathlib import Path
import os 

start_date = datetime(1981, 1, 1)
end_date = datetime.today().date()

fred_api_key = dbutils.secrets.get(scope ="am-da-kv-general-user" ,key="na-da-s-FredAPIKey")
fred = fred(api_key=fred_api_key) 

In [0]:
##Price Indices
cpiausl = "CPIAUCSL" #Consumer Price Index for All Urban Consumers: All Items in U.S. City Average
cpilfesl = "CPILFESL" #Consumer Price Index for All Urban Consumers: All Items Less Food and Energy in U.S. City Average
dcoilwtico = "DCOILWTICO" #Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma
ppiaco = "PPIACO" #Producer Price Index by Commodity: All Commodities
wpu10 = "WPU10" #Producer Price Index by Commodity: Metals and Metal Products
wps101 = "WPS101" #Producer Price Index by Commodity: Metals and Metal Products: Iron and Steel
wps057303 = "WPS057303" #Producer Price Index by Commodity: Fuels and Related Products and Power: No. 2 Diesel Fuel
wps0571 = "WPS0571" #Producer Price Index by Commodity: Fuels and Related Products and Power: Gasoline
pcu3344123344120 = "PCU3344123344120" #Producer Price Index by Industry: Bare Printed Circuit Board Manufacturing: Bare Printed Circuit Boards
pcu33443344 = "PCU33443344" #Producer Price Index by Industry: Semiconductor and Other Electronic Component Manufacturing
pcu3312223312221 = "PCU3312223312221" #Producer Price Index by Industry: Steel Wire Drawing: Noninsulated Ferrous Wire Rope, Cable, and Fabricated Wire Rope Assemblies (Including Lifting Slings)
pcu324191324191 = "PCU324191324191"#Producer Price Index by Industry: Petroleum Lubricating Oil and Grease Manufacturing
##Personal Income
mehoinusa672n = "MEHOINUSA672N" #Real Median Household Income in the United States
pcedg = "PCEDG" #Personal Consumption Expenditures: Durable Goods
pce = 'PCE' ##'Personal Consumption Expenditure'
psavert = 'PSAVERT'  ##'Personal Saving Rate'
dspi = "DSPI" #Disposable Personal Income
dspic96 = 'DSPIC96'  ##'Real Disposable Personal Income'

##Unemployment
unrate = 'UNRATE' ##'Unemployment Rate'
nrou = "NROU" #Noncyclical Rate of Unemployment
civpart = "CIVPART" #Labor Force Participation Rate
emratio = "EMRATIO" #Employment-Population Ratio
unemploy = "UNEMPLOY" #Unemployment Level
payems = "PAYEMS" #All Employees, Total Nonfarm
manemp = "MANEMP" #All Employees, Manufacturing
icsa = "ICSA" #Initial Claims
ic4wsa = "IC4WSA" #4-Week Moving Average of Initial Claims

##Monetary
m2sl = 'M2SL'  ##'M2'
rbusbis = 'RBUSBIS' ##'Real Broad Effective Exchange Rate for United States'
gs10 = 'GS10' ##'Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis'
fedfunds = 'FEDFUNDS' ##'Federal Funds Effective Rate'
ttlcons = 'TTLCONS' ##'Total Construction Spending: Total Construction in the United States'
indpro='INDPRO' ##''Industrial Production: Total Index'
gfdebtn = "GFDEBTN" #Federal Debt: Total Public Debt
dff = "DFF" #Federal Funds Effective Rate
dtb = "DTB3"  #3-Month Treasury Bill Secondary Market Rate, Discount Basis
dgs5 = "DGS5" #Market Yield on U.S. Treasury Securities at 5-Year Constant Maturity, Quoted on an Investment Basis
dgs10 = "DGS10" #Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis
dgs30 = "DGS30" #Market Yield on U.S. Treasury Securities at 30-Year Constant Maturity, Quoted on an Investment Basis
t5yie = "T5YIE" #5-Year Breakeven Inflation Rate
t10yie = "T10YIE" #10-Year Breakeven Inflation Rate
t5yifr = "T5YIFR" #5-Year, 5-Year Forward Inflation Expectation Rate
dprime = "DPRIME" #Bank Prime Loan Rate

##Other indicators
tcu = "TCU" #Capacity Utilization: Total Index
rrsfs = "RRSFS" #Advance Real Retail and Food Services Sales
houst = "HOUST" #New Privately-Owned Housing Units Started: Total Units
cp = "CP" #Corporate Profits After Tax (without IVA and CCAdj)
totci = "TOTCI" #Commercial and Industrial Loans, All Commercial Banks
ttlcons = 'TTLCONS' ##'Total Construction Spending: Total Construction in the United States'
indpro = 'INDPRO' ##''Industrial Production: Total Index'

In [0]:
#use variables defined above in previous cell, list inside of [ ... ] in the order you want the columns
series_names = [##Price Indices
cpiausl, #Consumer Price Index for All Urban Consumers: All Items in U.S. City Average
cpilfesl, #Consumer Price Index for All Urban Consumers: All Items Less Food and Energy in U.S. City Average
dcoilwtico,#Crude Oil Prices: West Texas Intermediate (WTI) - Cushing, Oklahoma
ppiaco, #Producer Price Index by Commodity: All Commodities
wpu10, #Producer Price Index by Commodity: Metals and Metal Products
wps101, #Producer Price Index by Commodity: Metals and Metal Products: Iron and Steel
pcu3344123344120, #Producer Price Index by Industry: Bare Printed Circuit Board Manufacturing: Bare Printed Circuit Boards
pcu33443344, #Producer Price Index by Industry: Semiconductor and Other Electronic Component Manufacturing
pcu3312223312221,#Producer Price Index by Industry: Steel Wire Drawing: Noninsulated Ferrous Wire Rope, Cable, and Fabricated Wire Rope Assemblies (Including Lifting Slings)
pcu324191324191,
wps057303, #Producer Price Index by Commodity: Fuels and Related Products and Power: No. 2 Diesel Fuel
wps0571, #Producer Price Index by Commodity: Fuels and Related Products and Power: Gasoline

  
  ##Personal Income
mehoinusa672n, #Real Median Household Income in the United States
pcedg, #Personal Consumption Expenditures: Durable Goods
pce, ##'Personal Consumption Expenditure'
psavert,  ##'Personal Saving Rate'
dspi, #Disposable Personal Income
dspic96,  ##'Real Disposable Personal Income'

##Unemployment
unrate, ##'Unemployment Rate'
nrou, #Noncyclical Rate of Unemployment
civpart, #Labor Force Participation Rate
emratio, #Employment-Population Ratio
unemploy, #Unemployment Level
payems, #All Employees, Total Nonfarm
manemp,#All Employees, Manufacturing
icsa, #Initial Claims
ic4wsa, #4-Week Moving Average of Initial Claims

##Monetary
m2sl,  ##'M2'
rbusbis, ##'Real Broad Effective Exchange Rate for United States'
gs10, ##'Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis'
fedfunds, ##'Federal Funds Effective Rate'
ttlcons, ##'Total Construction Spending: Total Construction in the United States'
indpro, ##''Industrial Production: Total Index'
gfdebtn, #Federal Debt: Total Public Debt
dff, #Federal Funds Effective Rate
dtb,  #3-Month Treasury Bill Secondary Market Rate, Discount Basis
dgs5, #Market Yield on U.S. Treasury Securities at 5-Year Constant Maturity, Quoted on an Investment Basis
dgs10, #Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis
dgs30,#Market Yield on U.S. Treasury Securities at 30-Year Constant Maturity, Quoted on an Investment Basis
t5yie, #5-Year Breakeven Inflation Rate
t10yie, #10-Year Breakeven Inflation Rate
t5yifr, #5-Year, 5-Year Forward Inflation Expectation Rate
dprime, #Bank Prime Loan Rate

##Other indicators
tcu, #Capacity Utilization: Total Index
rrsfs, #Advance Real Retail and Food Services Sales
houst, #New Privately-Owned Housing Units Started: Total Units
cp, #Corporate Profits After Tax (without IVA and CCAdj)
totci, #Commercial and Industrial Loans, All Commercial Banks
ttlcons, ##'Total Construction Spending: Total Construction in the United States'
indpro ##''Industrial Production: Total Index'
]
print (series_names)

In [0]:
df = {}
for series in series_names:
    df[series] = fred.get_series_info(series)
datasets = pd.DataFrame(df).T.reset_index()
datasets["last_updated"] = str(datasets["last_updated"].str[:-3][0])
datasets["last_updated"] = pd.to_datetime(datasets["last_updated"])
datasets["observation_end"] = pd.to_datetime(datasets["observation_end"])
datasets["observation_start"] = pd.to_datetime(datasets["observation_start"])
datasets["realtime_end"] = pd.to_datetime(datasets["realtime_end"])
datasets["realtime_start"] = pd.to_datetime(datasets["realtime_start"])
datasets.head()

Unnamed: 0,index,frequency,frequency_short,id,last_updated,notes,observation_end,observation_start,popularity,realtime_end,realtime_start,seasonal_adjustment,seasonal_adjustment_short,title,units,units_short
0,CPIAUCSL,Monthly,M,CPIAUCSL,2022-07-13 07:37:02,The Consumer Price Index for All Urban Consume...,2022-06-01,1947-01-01,93,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Consumer Price Index for All Urban Consumers: ...,Index 1982-1984=100,Index 1982-1984=100
1,CPILFESL,Monthly,M,CPILFESL,2022-07-13 07:37:02,"The ""Consumer Price Index for All Urban Consum...",2022-06-01,1957-01-01,81,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Consumer Price Index for All Urban Consumers: ...,Index 1982-1984=100,Index 1982-1984=100
2,DCOILWTICO,Daily,D,DCOILWTICO,2022-07-13 07:37:02,"Definitions, Sources and Explanatory Notes (ht...",2022-08-01,1986-01-02,81,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Crude Oil Prices: West Texas Intermediate (WTI...,Dollars per Barrel,$ per Barrel
3,PPIACO,Monthly,M,PPIACO,2022-07-13 07:37:02,,2022-06-01,1913-01-01,79,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Commodity: All Commodi...,Index 1982=100,Index 1982=100
4,WPU10,Monthly,M,WPU10,2022-07-13 07:37:02,,2022-06-01,1926-01-01,57,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Commodity: Metals and ...,Index 1982=100,Index 1982=100


In [0]:
from pyspark.sql import SparkSession
#Create PySpark SparkSession
spark = SparkSession.builder \
    .master("local[1]") \
    .getOrCreate()
#Create PySpark DataFrame from Pandas
schema = StructType([
  StructField('index',StringType(), False),
  StructField('id',StringType(), False),
  StructField('frequency',StringType(), True),
  StructField('frequency_short',StringType(), True),
  StructField('last_updated', DateType(), False),
  StructField('notes',StringType(), True),
  StructField('observation_end',DateType(), True),
  StructField('observation_start',DateType(), True),
  StructField('popularity',StringType(), True),
  StructField('realtime_end',DateType(), True),
  StructField('realtime_start',DateType(), True),
  StructField('seasonal_adjustment',StringType(), True),
  StructField('seasonal_adjustment_short',StringType(), True),
  StructField('title',StringType(), True),
  StructField('units',StringType(), True),
  StructField('units_short',StringType(), True)
])
datasets=spark.createDataFrame(data = datasets, schema = schema)
datasets.printSchema()

In [0]:
datasets.write\
              .partitionBy("id")\
              .mode("overwrite")\
              .format("delta")\
              .option("overwriteSchema", "true")\
              .save("/mnt/datalake_raw/public/fred/datasets")

spark.sql("CREATE TABLE if not exists ods.public_fred_datasets USING DELTA LOCATION '/mnt/datalake_raw/public/fred/datasets'")

In [0]:
%sql
SELECT * FROM ods.public_fred_datasets LIMIT 25

index,id,frequency,frequency_short,last_updated,notes,observation_end,observation_start,popularity,realtime_end,realtime_start,seasonal_adjustment,seasonal_adjustment_short,title,units,units_short
CPIAUCSL,Monthly,M,CPIAUCSL,2022-07-13,"The Consumer Price Index for All Urban Consumers: All Items (CPIAUCSL) is a price index of a basket of goods and services paid by urban consumers. Percent changes in the price index measure the inflation rate between any two time periods. The most common inflation metric is the percent change from one year ago. It can also represent the buying habits of urban consumers. This particular index includes roughly 88 percent of the total population, accounting for wage earners, clerical workers, technical workers, self-employed, short-term workers, unemployed, retirees, and those not in the labor force. The CPIs are based on prices for food, clothing, shelter, and fuels; transportation fares; service fees (e.g., water and sewer service); and sales taxes. Prices are collected monthly from about 4,000 housing units and approximately 26,000 retail establishments across 87 urban areas. To calculate the index, price changes are averaged with weights representing their importance in the spending of the particular group. The index measures price changes (as a percent change) from a predetermined reference date. In addition to the original unadjusted index distributed, the Bureau of Labor Statistics also releases a seasonally adjusted index. The unadjusted series reflects all factors that may influence a change in prices. However, it can be very useful to look at the seasonally adjusted CPI, which removes the effects of seasonal changes, such as weather, school year, production cycles, and holidays. The CPI can be used to recognize periods of inflation and deflation. Significant increases in the CPI within a short time frame might indicate a period of inflation, and significant decreases in CPI within a short time frame might indicate a period of deflation. However, because the CPI includes volatile food and oil prices, it might not be a reliable measure of inflationary and deflationary periods. For a more accurate detection, the core CPI (CPILFESL (https://fred.stlouisfed.org/series/CPILFESL)) is often used. When using the CPI, please note that it is not applicable to all consumers and should not be used to determine relative living costs. Additionally, the CPI is a statistical measure vulnerable to sampling error since it is based on a sample of prices and not the complete average. For more information on the consumer price indexes, see: Bureau of Economic Analysis. ""CPI Detailed Report."" (https://www.bls.gov/cpi/) 2013. Handbook of Methods (https://www.bls.gov/opub/hom/pdf/cpihom.pdf) Understanding the CPI: Frequently Asked Questions (https://www.bls.gov/cpi/questions-and-answers.htm)",2022-06-01,1947-01-01,93,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Consumer Price Index for All Urban Consumers: All Items in U.S. City Average,Index 1982-1984=100,Index 1982-1984=100
CPILFESL,Monthly,M,CPILFESL,2022-07-13,"The ""Consumer Price Index for All Urban Consumers: All Items Less Food & Energy"" is an aggregate of prices paid by urban consumers for a typical basket of goods, excluding food and energy. This measurement, known as ""Core CPI,"" is widely used by economists because food and energy have very volatile prices. The Bureau of Labor Statistics defines and measures the official CPI, and more information can be found in the FAQ (https://www.bls.gov/cpi/questions-and-answers.htm) or in this article (https://www.bls.gov/opub/hom/pdf/cpihom.pdf).",2022-06-01,1957-01-01,81,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Consumer Price Index for All Urban Consumers: All Items Less Food and Energy in U.S. City Average,Index 1982-1984=100,Index 1982-1984=100
PPIACO,Monthly,M,PPIACO,2022-07-13,,2022-06-01,1913-01-01,79,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Commodity: All Commodities,Index 1982=100,Index 1982=100
WPU10,Monthly,M,WPU10,2022-07-13,,2022-06-01,1926-01-01,57,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Commodity: Metals and Metal Products,Index 1982=100,Index 1982=100
WPS101,Monthly,M,WPS101,2022-07-13,,2022-06-01,1967-01-01,41,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Producer Price Index by Commodity: Metals and Metal Products: Iron and Steel,Index 1982=100,Index 1982=100
PCU3344123344120,Monthly,M,PCU3344123344120,2022-07-13,,2022-06-01,1982-06-01,13,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Industry: Bare Printed Circuit Board Manufacturing: Bare Printed Circuit Boards,Index Jun 1982=100,Index Jun 1982=100
PCU33443344,Monthly,M,PCU33443344,2022-07-13,,2022-06-01,1984-12-01,64,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Industry: Semiconductor and Other Electronic Component Manufacturing,Index Dec 1984=100,Index Dec 1984=100
PCU3312223312221,Monthly,M,PCU3312223312221,2022-07-13,,2010-02-01,1965-01-01,2,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,"Producer Price Index by Industry: Steel Wire Drawing: Noninsulated Ferrous Wire Rope, Cable, and Fabricated Wire Rope Assemblies (Including Lifting Slings)",Index Jun 1982=100,Index Jun 1982=100
PCU324191324191,Monthly,M,PCU324191324191,2022-07-13,,2022-06-01,1980-12-01,41,2022-08-04,2022-08-04,Not Seasonally Adjusted,NSA,Producer Price Index by Industry: Petroleum Lubricating Oil and Grease Manufacturing,Index Dec 1980=100,Index Dec 1980=100
WPS057303,Monthly,M,WPS057303,2022-07-13,,2022-06-01,1985-06-01,28,2022-08-04,2022-08-04,Seasonally Adjusted,SA,Producer Price Index by Commodity: Fuels and Related Products and Power: No. 2 Diesel Fuel,Index 1982=100,Index 1982=100


In [0]:
df = {}
#use variables defined above in previous cell
for series in series_names:
  df[series] = fred.get_series(series, start_date, end_date)
  df = pd.DataFrame(df)
#set numeric datatype from results
cols = df.columns
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
#move date index to fixed column and set datatype
df.reset_index(inplace=True)
df = df.rename(columns = {'index':'observation_date'})
df["observation_date"] = pd.to_datetime(df["observation_date"])
#display capture
df.head()

Unnamed: 0,observation_date,CPIAUCSL,CPILFESL,DCOILWTICO,PPIACO,WPU10,WPS101,PCU3344123344120,PCU33443344,PCU3312223312221,PCU324191324191,WPS057303,WPS0571,MEHOINUSA672N,PCEDG,PCE,PSAVERT,DSPI,DSPIC96,UNRATE,NROU,CIVPART,EMRATIO,UNEMPLOY,PAYEMS,MANEMP,ICSA,IC4WSA,M2SL,RBUSBIS,GS10,FEDFUNDS,TTLCONS,INDPRO,GFDEBTN,DFF,DTB3,DGS5,DGS10,DGS30,T5YIE,T10YIE,T5YIFR,DPRIME,TCU,RRSFS,HOUST,CP,TOTCI
0,1981-01-01,87.2,85.4,,95.2,97.5,94.8,,,90.1,100.4,,100.1,,239.9,1870.0,10.9,2160.8,4980.4,7.5,6.187123,63.9,59.1,8071.0,91033.0,18639.0,,,1606.9,,12.57,19.08,,51.0656,964531.0,22.0,,,,,,,,,80.5752,,1547.0,213.17,
1,1981-02-01,88.0,85.9,,96.1,97.5,94.8,,,90.1,102.1,,104.1,,247.3,1884.2,10.8,2175.9,4965.0,7.4,,63.9,59.2,8051.0,91105.0,18613.0,,,1618.7,,13.19,15.93,,50.8554,,17.25,,,,,,,,,80.0469,,1246.0,,
2,1981-03-01,88.6,86.4,,97.0,98.3,96.1,,,93.3,103.3,,110.1,,250.9,1902.9,10.8,2197.3,4979.0,7.4,,64.1,59.4,7982.0,91210.0,18647.0,,,1636.6,,13.12,14.7,,51.1107,,15.53,,,,,,,,,80.2479,,1306.0,,
3,1981-04-01,89.1,87.0,,98.0,99.1,96.7,,,97.8,103.8,,114.9,,241.7,1904.4,10.9,2200.7,4965.1,7.2,6.178361,64.2,59.6,7869.0,91283.0,18711.0,,,1659.2,,13.68,15.72,,50.8864,971174.0,14.51,12.37,13.36,13.14,12.65,,,,17.5,79.6948,,1360.0,201.614,316.1307
4,1981-05-01,89.7,87.8,,98.3,99.2,97.3,,,98.0,105.3,,112.0,,239.8,1913.8,11.0,2214.9,4974.8,7.5,,64.3,59.5,8174.0,91296.0,18766.0,,,1664.2,,14.1,18.52,,51.1788,,19.79,14.67,14.37,14.05,13.59,,,,18.0,79.9489,,1140.0,,


In [0]:
from pyspark.sql import SparkSession
#Create PySpark SparkSession
spark = SparkSession.builder \
    .master("local[1]") \
    .getOrCreate()
#Create PySpark DataFrame from Pandas
schema = StructType([
  StructField('observation_date', DateType(), False),
  StructField('CPIAUCSL',  FloatType(), True),
  StructField('CPILFESL',  FloatType(), True),
  StructField('DCOILWTICO',  FloatType(), True),
  StructField('PPIACO',  FloatType(), True),
  StructField('WPU10',  FloatType(), True),
  StructField('WPS057303',    FloatType(), True),    
  StructField('WPS0571',  FloatType(), True),
  StructField('WPS101',  FloatType(), True),
  StructField('PCU3344123344120',  FloatType(), True),
  StructField('PCU33443344',  FloatType(), True),
  StructField('PCU3312223312221',  FloatType(), True),
  StructField('PCU324191324191',  FloatType(), True),
  StructField('MEHOINUSA672N',  FloatType(), True),
  StructField('PCEDG',  FloatType(), True),
  StructField('PCE',  FloatType(), True),
  StructField('PSAVERT',  FloatType(), True),
  StructField('DSPI', FloatType(), True),
  StructField('DSPIC96',  FloatType(), True),
  StructField('UNRATE',  FloatType(), True),
  StructField('NROU',  FloatType(), True),
  StructField('CIVPART',  FloatType(), True),
  StructField('EMRATIO',  FloatType(), True),
  StructField('UNEMPLOY', FloatType(), True), 
  StructField('PAYEMS', FloatType(), True),
  StructField('MANEMP',  FloatType(), True),
  StructField('ICSA',  FloatType(), True),
  StructField('IC4WSA', FloatType(), True), 
  StructField('M2SL',  FloatType(), True),
  StructField('RBUSBIS', FloatType(), True), 
  StructField('GS10',  FloatType(), True),
  StructField('FEDFUNDS', FloatType(), True),
  StructField('TTLCONS', FloatType(), True), 
  StructField('INDPRO',  FloatType(), True),
  StructField('GFDEBTN',  FloatType(), True),
  StructField('DFF',  FloatType(), True),
  StructField('DTB3', FloatType(), True), 
  StructField('DGS5',  FloatType(), True),
  StructField('DGS10', FloatType(), True) ,
  StructField('DGS30',  FloatType(), True),      
  StructField('T5YIE', FloatType(), True), 
  StructField('T10YIE',  FloatType(), True),
  StructField('T5YIFR',  FloatType(), True),
  StructField('DPRIME',  FloatType(), True),
  StructField('TCU',  FloatType(), True),
  StructField('RRSFS',  FloatType(), True),
  StructField('HOUST',  FloatType(), True),
  StructField('CP',  FloatType(), True) ,
  StructField('TOTCI', FloatType(), True)
])
datasets=spark.createDataFrame(data = df, schema = schema)
datasets.printSchema()

In [0]:
datasets.write\
              .partitionBy("observation_date")\
              .mode("overwrite")\
              .format("delta")\
              .option("overwriteSchema", "true")\
              .save("/mnt/datalake_raw/public/fred/data")

spark.sql("CREATE TABLE if not exists ods.public_fred_data USING DELTA LOCATION '/mnt/datalake_raw/public/fred/data'")

In [0]:
%sql
SELECT * FROM ods.public_fred_data LIMIT 25

observation_date,CPIAUCSL,CPILFESL,DCOILWTICO,PPIACO,WPU10,WPS057303,WPS0571,WPS101,PCU3344123344120,PCU33443344,PCU3312223312221,PCU324191324191,MEHOINUSA672N,PCEDG,PCE,PSAVERT,DSPI,DSPIC96,UNRATE,NROU,CIVPART,EMRATIO,UNEMPLOY,PAYEMS,MANEMP,ICSA,IC4WSA,M2SL,RBUSBIS,GS10,FEDFUNDS,TTLCONS,INDPRO,GFDEBTN,DFF,DTB3,DGS5,DGS10,DGS30,T5YIE,T10YIE,T5YIFR,DPRIME,TCU,RRSFS,HOUST,CP,TOTCI
1981-07-01,91.5,89.8,,99.0,100.1,100.5,,,98.0,105.5,,108.5,,245.2,1942.1,12.3,2284.8,5080.8,7.2,6.1693025,63.8,59.1,7863.0,91601.0,18785.0,,,1681.9,,14.28,19.04,,51.7764,997855.0,21.09,14.65,14.47,14.04,13.47,,,,20.0,80.4696,,1041.0,206.868,331.0504
1981-04-01,89.1,87.0,,98.0,99.1,96.7,,,97.8,103.8,,114.9,,241.7,1904.4,10.9,2200.7,4965.1,7.2,6.1783614,64.2,59.6,7869.0,91283.0,18711.0,,,1659.2,,13.68,15.72,,50.8864,971174.0,14.51,12.37,13.36,13.14,12.65,,,,17.5,79.6948,,1360.0,201.614,316.1307
1982-07-01,97.5,96.7,,100.4,99.3,100.0,99.8,,100.4,106.5,,101.3,,247.8,2072.2,12.5,2452.9,5157.0,9.8,6.131687,64.0,57.7,10849.0,89521.0,17278.0,,,1831.5,,13.95,12.59,,48.4863,1142035.0,14.73,12.55,14.62,14.4,13.9,,,,16.5,73.5119,,1166.0,180.728,
1982-10-01,98.1,97.5,,100.2,100.0,99.5,99.5,,98.3,106.3,,99.0,,254.1,2125.8,11.3,2480.8,5156.3,10.4,6.122254,64.1,57.4,11529.0,88907.0,16853.0,,,1869.7,,10.91,9.71,,47.4738,1197074.0,10.87,7.35,11.5,11.51,11.65,,,,13.5,71.7456,,1173.0,174.574,
1981-10-01,93.4,92.1,,98.9,101.2,100.8,,,104.2,105.9,,107.0,,237.3,1963.9,13.0,2329.4,5093.8,7.9,6.160031,63.8,58.8,8646.0,91380.0,18566.0,,,1721.8,,15.15,15.08,,51.0625,1028729.0,16.96,14.57,16.13,15.75,15.14,,,,19.5,78.7744,,873.0,203.587,
1982-04-01,95.0,94.7,,99.6,100.5,100.4,,,104.2,106.3,,97.5,,244.9,2026.3,12.9,2408.4,5160.6,9.3,6.1411595,63.9,57.9,10244.0,90150.0,17683.0,,,1803.9,,13.87,14.94,,49.1146,1079630.0,15.48,13.24,14.26,14.1,13.6,,,,16.5,74.8135,,917.0,185.364,
1982-12-01,97.7,97.2,,100.5,99.4,98.5,99.5,,98.3,106.1,,97.7,,270.8,2161.6,10.9,2513.1,5200.2,10.8,,64.1,57.2,12051.0,88771.0,16690.0,,,1905.9,,10.54,8.95,,46.9813,,9.1,8.28,10.34,10.74,10.69,,,,11.5,70.9046,,1303.0,,392.5171
1982-09-01,97.7,97.2,,100.0,100.0,99.7,99.5,,98.6,106.5,,100.5,,260.0,2104.6,11.8,2469.9,5162.4,10.1,,64.1,57.6,11217.0,89183.0,17074.0,,,1858.4,,12.34,10.31,,47.9303,,11.28,8.42,12.58,12.76,12.41,,,,13.5,72.5013,,1144.0,,388.3781
1982-11-01,98.0,97.3,,100.3,99.6,99.2,99.5,,98.3,106.2,,98.4,,268.1,2149.3,10.9,2497.7,5176.1,10.8,,64.2,57.3,11938.0,88786.0,16722.0,,,1883.7,,10.55,9.2,,47.32,,9.43,7.81,10.41,10.56,10.84,,,,12.0,71.4593,,1372.0,,
1983-01-01,97.9,97.6,,100.2,99.6,98.1,99.6,,98.3,106.2,,95.5,,268.3,2174.0,11.1,2533.1,5217.2,10.4,6.112889,63.9,57.2,11534.0,88990.0,16705.0,534000.0,518000.0,1959.4,,10.46,8.68,,47.8523,1244493.0,11.2,,,,,,,,,72.1842,,1586.0,176.17,


In [0]:
dbutils.notebook.exit("Job Completed Successfuly!")

Job Completed Successfuly!