In [None]:
# ******************************************************
# *
# * Name:         nb-create-meta-data.ipynb
# *     
# * Design Phase:
# *     Author:   John Miner
# *     Date:     03-01-2024
# *     Purpose:  This meta-data drives the ADF pipelines.
# * 
# ******************************************************

In [1]:
%%sql

--
--  remove table
--

drop table if exists meta_data;

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 2, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [2]:
%%sql

--
--  create table
--

create table meta_data 
(
    pipeline_id int,
    container_nm string,
    file_nm string,
    header_flg boolean,
    delimiter_str string,
    table_nm string,
    schema_str string
);


StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 3, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [3]:
%%sql

--
--  1 - dim currency
--

insert into meta_data values 
(
    1, 
    'raw/saleslt/dim_currency', 
    'DimCurrency.csv', 
    false, 
    '|', 
    'dim_currency', 
    'CurrencyKey int, CurrencyAlternateKey string, CurrencyName string'
);


StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 4, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [4]:
%%sql

--
--  2 - dim customer
--

insert into meta_data values 
(
    2, 
    'raw/saleslt/dim_customer', 
    'DimCustomer.csv', 
    false, 
    '|', 
    'dim_customer', 
    'CustomerKey int, GeographyKey int, CustomerAlternateKey string, Title string, FirstName string, MiddleName string, LastName string, NameStyle int, BirthDate date, MaritalStatus string, Suffix string, Gender string, EmailAddress string, YearlyIncome decimal(19, 4), TotalChildren int, NumberChildrenAtHome int, EnglishEducation string, SpanishEducation string, FrenchEducation string, EnglishOccupation string, SpanishOccupation string, FrenchOccupation string, HouseOwnerFlag string, NumberCarsOwned int, AddressLine1 string, AddressLine2 string, Phone string, DateFirstPurchase date, CommuteDistance string'
);


StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 5, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [5]:
%%sql

--
--  3 - dim date
--

insert into meta_data values 
(
    3, 
    'raw/saleslt/dim_date', 
    'DimDate.csv', 
    false, 
    '|', 
    'dim_date', 
    'DateKey int, FullDateAlternateKey date, DayNumberOfWeek int, EnglishDayNameOfWeek string, SpanishDayNameOfWeek string, FrenchDayNameOfWeek string, DayNumberOfMonth int, DayNumberOfYear int, WeekNumberOfYear int, EnglishMonthName string, SpanishMonthName string, FrenchMonthName string, MonthNumberOfYear int, CalendarQuarter int, CalendarYear int, CalendarSemester int, FiscalQuarter int, FiscalYear int, FiscalSemester int'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 6, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [6]:
%%sql

--
--  4 - dim geography
--

insert into meta_data values 
(
    4, 
    'raw/saleslt/dim_geography', 
    'DimGeography.csv', 
    false, 
    '|', 
    'dim_geography', 
    'GeographyKey int, City string, StateProvinceCode string, StateProvinceName string, CountryRegionCode string, EnglishCountryRegionName string, SpanishCountryRegionName string, FrenchCountryRegionName string, PostalCode string, SalesTerritoryKey int, IpAddressLocator string'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 7, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [7]:
%%sql

--
--  5 - dim product
--

insert into meta_data values 
(
    5, 
    'raw/saleslt/dim_product', 
    'DimProduct.csv', 
    false, 
    '|', 
    'dim_product', 
    'ProductKey int, ProductAlternateKey string, ProductSubcategoryKey int, WeightUnitMeasureCode string, SizeUnitMeasureCode string, EnglishProductName string, SpanishProductName string, FrenchProductName string, StandardCost decimal(19, 4), FinishedGoodsFlag boolean, Color string, SafetyStockLevel int, ReorderPoint int, ListPrice decimal(19, 4), Size string, SizeRange string, Weight decimal(19, 4), DaysToManufacture int, ProductLine string, DealerPrice decimal(19, 4), Class string, Style string, ModelName string, StartDate timestamp, EndDate timestamp, Status string'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 8, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [8]:
%%sql

--
--  6 - dim product category
--

insert into meta_data values 
(
    6, 
    'raw/saleslt/dim_product_category', 
    'DimProductCategory.csv', 
    false, 
    '|', 
    'dim_product_category', 
    'ProductCategoryKey int, ProductCategoryAlternateKey int, EnglishProductCategoryName string, SpanishProductCategoryName string, FrenchProductCategoryName string'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 9, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [9]:
%%sql

--
--  7 - dim product sub category
--

insert into meta_data values 
(
    7, 
    'raw/saleslt/dim_product_subcategory', 
    'DimProductSubcategory.csv', 
    false, 
    '|', 
    'dim_product_subcategory', 
    'ProductSubcategoryKey int, ProductSubcategoryAlternateKey int, EnglishProductSubcategoryName string, SpanishProductSubcategoryName string, FrenchProductSubcategoryName string, ProductCategoryKey int'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 10, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [10]:
%%sql

--
--  8 - dim sales reason
--

insert into meta_data values 
(
    8, 
    'raw/saleslt/dim_sales_reason', 
    'DimSalesReason.csv', 
    false, 
    '|', 
    'dim_sales_reason', 
    'SalesReasonKey int, SalesReasonAlternateKey int, SalesReasonName string, SalesReasonReasonType string'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 11, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [11]:
%%sql

--
--  9 - dim sales territory
--

insert into meta_data values 
(
    9, 
    'raw/saleslt/dim_sales_territory', 
    'DimSalesTerritory.csv', 
    false, 
    '|', 
    'dim_sales_territory', 
    'SalesTerritoryKey int, SalesTerritoryAlternateKey int, SalesTerritoryRegion string, SalesTerritoryCountry string, SalesTerritoryGroup string'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 12, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [12]:
%%sql

--
--  10 - dim sales territory
--

insert into meta_data values 
(
    10, 
    'raw/saleslt/fact_internet_sales', 
    'FactInternetSales.csv', 
    false, 
    '|', 
    'fact_internet_sales', 
    'ProductKey int, OrderDateKey int, DueDateKey int, ShipDateKey int, CustomerKey int, PromotionKey int, CurrencyKey int, SalesTerritoryKey int, SalesOrderNumber string, SalesOrderLineNumber int, RevisionNumber int, OrderQuantity int, UnitPrice decimal(19, 4), ExtendedAmount decimal(19, 4), UnitPriceDiscountPct decimal(19, 4), DiscountAmount decimal(19, 4), ProductStandardCost decimal(19, 4), TotalProductCost decimal(19, 4), SalesAmount decimal(19, 4), TaxAmt decimal(19, 4), Freight decimal(19, 4), CarrierTrackingNumber string, CustomerPONumber string, OrderDate timestamp, DueDate timestamp, ShipDate timestamp'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 13, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [13]:
%%sql

--
--  11 - dim sales territory
--

insert into meta_data values 
(
    11, 
    'raw/saleslt/fact_internet_sales_reason', 
    'FactInternetSalesReason.csv', 
    false, 
    '|', 
    'fact_internet_sales_reason', 
    'SalesOrderNumber string, SalesOrderLineNumber int, SalesReasonKey int'
);

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 14, Finished, Available)

<Spark SQL result set with 0 rows and 0 fields>

In [14]:
%%sql

--
--  show the data
--

select * from meta_data order by pipeline_id;

StatementMeta(, d3604163-3055-4cb1-a71a-5153c01f4b61, 15, Finished, Available)

<Spark SQL result set with 11 rows and 7 fields>