In [0]:
storage_account = 'stgbus'
container_name = 'buscontain'
scope_name = 'bus-scope'
secret_name = 'busas'

In [0]:
spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net","SAS")

In [0]:
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account}.dfs.core.windows.net","org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")

In [0]:
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account}.dfs.core.windows.net",dbutils.secrets.get(f'{scope_name}',f'{secret_name}'))

In [0]:
%sql
CREATE TABLE business_db.business_bronze
(
  user_id string, 
  user_name string, 
  average_business_count int,
  business_id string,
  review_id string,
  state string,
  city string,
  stars double,
  categories string,
  availability int,
  business_name string,
  start_date string,
  fans string,
  reviews string,
  filename string,
  createdon timestamp
)
USING delta 
LOCATION 'dbfs:/FileStore/data/business_bronze.delta' 
TBLPROPERTIES (
  delta.enableChangeDataFeed = true
)

In [0]:
%sql
copy into business_db.business_bronze from
(
  select
  user_id :: string, 
  user_name :: string, 
  average_business_count :: int,
  business_id :: string,
  review_id :: string,
  state :: string,
  city :: string,
  stars :: double,
  categories :: string,
  availability :: int,
  business_name :: string,
  start_date :: string,
  fans :: string,
  reviews :: string,
  INPUT_FILE_NAME() as filename,
  CURRENT_TIMESTAMP() as createdon 
  FROM "abfs://buscontain@stgbus.dfs.core.windows.net/source/bronze_data.csv"
  )
  fileformat=CSV
  format_options('header'='true')


num_affected_rows,num_inserted_rows,num_skipped_corrupt_files
14448419,14448419,0


In [0]:
%sql
SELECT *
FROM business_db.business_bronze
limit 3

user_id,user_name,average_business_count,business_id,review_id,state,city,stars,categories,availability,business_name,start_date,fans,reviews,filename,createdon
vBFtIlBirAvgbTrxE6w30A,Vikki,1,-DGsnMlRrR_tv8avrpQLQw,eQMbAv02qGnar20BzGKnrQ,Pennsylvania,Philadelphia,5.0,"Breakfast & Brunch, Restaurants, Diners",1,Sam's Morning Glory Diner,2010-04-12 16:15:25,1,"Not sure why this place only has an average of 3.5 stars. It was fantastic. We had an early lunch/brunch and I ordered a breakfast platter with the famed biscuit. DELICIOUS!! The biscuit was savory (and square!) and didn't even need butter. I could have ordered a pan of these to go. Eggs, sausage and potatoes were also perfect and plentiful. Will definitely return when I'm in the area.",abfs://buscontain@stgbus.dfs.core.windows.net/source/bronze_data.csv/part-00000-tid-7799268684431025758-56c52a07-9e47-43f4-bcf0-71f8e6afbed7-853-1-c000.csv,2024-02-04T11:43:17.57Z
vBFtIlBirAvgbTrxE6w30A,Vikki,1,aDvsKgUSTQaZ_yM-ZcWWcg,YW2q4p8OwP84d--fHrUTrQ,Pennsylvania,Bryn Mawr,4.0,"Food, Juice Bars & Smoothies, Restaurants, Sandwiches, Salad, Vegan, Vegetarian",1,Naked Lunch,2010-04-12 16:15:25,1,"I keep forgetting about Naked Lunch as an eatery option because it's inside Mom's Organic Market, and I rarely shop at Mom's because I don't live in Bryn Mawr. But, when I do go to Mom's, I try to grab a quick meal at Naked Lunch because it's so darn... organic and quite healthy. I've had the Moler Bowl twice, but I tried the Jammed Yam on my last visit, and it's now my new favorite. Large baked yam stuffed with quinoa (I think I asked for quinoa instead of what it's supposed to come with), black beans, and avocado. It's served with some kind of mild jalepeno dressing. It was kind of bland, but when I added salt and pepper it was delicious, and it's very filling. I wish Naked Lunch were in Suburban Square, or somewhere in Ardmore with its own storefront. It's kinda pricey (isn't all organic food?), but for $8.50 it's a filling meal and a great alternative to Sweetgreen and Honeygrow.",abfs://buscontain@stgbus.dfs.core.windows.net/source/bronze_data.csv/part-00000-tid-7799268684431025758-56c52a07-9e47-43f4-bcf0-71f8e6afbed7-853-1-c000.csv,2024-02-04T11:43:17.57Z
vBFtIlBirAvgbTrxE6w30A,Vikki,1,KvfKuEE9vdMha3F15zbsnA,6Nbc29KSLuses46gGiQmNw,Pennsylvania,Wynnewood,4.0,"Ice Cream & Frozen Yogurt, Food",1,Yapple Yogurt,2010-04-12 16:15:25,1,"Given that SweetGreen recently opened in nearby Ardmore and serves fat-free plain soft serve yogurt with healthy toppings, I was doubtful that YY would make much of an impact or serve a purpose. But I guess I'm wrong since, at 2:37 on a Tuesday afternoon, the place is humming. Ok, so it's mostly moms and tots, and high school kids, but I am surprised to see it so busy.",abfs://buscontain@stgbus.dfs.core.windows.net/source/bronze_data.csv/part-00000-tid-7799268684431025758-56c52a07-9e47-43f4-bcf0-71f8e6afbed7-853-1-c000.csv,2024-02-04T11:43:17.57Z


In [0]:
%sql
CREATE TABLE business_db.business_silver
(
  user_id string, 
  user_name string,
  user_count int, 
  average_business_count int,
  business_id string,
  review_id string,
  state string,
  city string,
  stars double,
  categories string,
  availability string,
  business_name string,
  start_date TIMESTAMP,
  fans string,
  reviews string,
  modifiedon timestamp,
  createdon timestamp
)
USING delta 
LOCATION 'dbfs:/FileStore/data/business_silver.delta' 
TBLPROPERTIES (
  delta.enableChangeDataFeed = true
)

In [0]:
%sql
create or replace temporary view business_bronze_changes 
as 
select user_id , 
user_name , 
count(*) over (partition by user_id) as user_count,
average_business_count ,
business_id ,
review_id ,
state ,
city ,
stars,
categories ,
case when availability= 1 then 'Open' else NULL end as availability,
business_name ,
start_date ,
reviews  
from table_changes('business_db.business_bronze',1) 
where 1=1
and availability = 1 

In [0]:
%sql
select * from business_bronze_changes limit 3

user_id,user_name,user_count,average_business_count,business_id,review_id,state,city,stars,categories,availability,business_name,start_date,reviews
--6RLpoufvX9f5gQs_LOuw,Megan,1,1,AB5mv5gMXGq5xjhdNuFSMg,qF1f98xRf9f6k7JdfIrk4Q,Pennsylvania,Philadelphia,5.0,"Food, Ice Cream & Frozen Yogurt",Open,Frozen Rolled Ice Cream,2014-02-10 00:14:04,Amazing ice cream and beverages! My girls and my husband got their hand rolled ice cream and it was so delicious and fresh tasting.
--Kwhcbkh7jxkhVVQZo2uQ,Larz,3,1,ORL4JE6tz3rJxVqkdKfegA,bSz0fCiKRJAB0qI9lB519A,Tennessee,Nashville,4.0,"Venues & Event Spaces, Performing Arts, Arts & Entertainment, Hotels & Travel, Food, Convenience Stores, American (New), Beauty & Spas, Restaurants, Museums, Event Planning & Services, Hotels, Cinema, Resorts, Day Spas",Open,Gaylord Opryland Resort & Convention Center,2014-05-30 16:58:11,"The place is absolutely huge! That's the attraction. Bring some comfortable shoes because it can take several minutes to walk from one end to the other. It's a small town with a river and waterfalls inside a gigantic building. You will get lost the entire first day. You can spend a couple days lounging around and taking in the trees, plants, fish and waterfalls."
--Kwhcbkh7jxkhVVQZo2uQ,Larz,3,1,tIvfmgT1qMeAEQf8CI5fPQ,G4YEeMu4Sj1XUEmlJwGe4A,Tennessee,Nashville,4.0,"Bars, Nightlife, American (Traditional), Barbeque, Seafood, Beer Bar, Sports Bars, Restaurants, Burgers, Steakhouses, Southern",Open,Caney Fork River Valley Grille,2014-05-30 16:58:11,"Ate dinner there tonight. Service was pretty fast and very nice. In fact, she might have tried a little hard to be nice and flirty."


In [0]:
%sql
merge into business_db.business_silver tgt using business_bronze_changes src on tgt.user_id = src.user_id
when matched then 
update set 
tgt.user_id                = src.user_id  ,
tgt.user_name              = src.user_name ,
tgt.user_count             = src.user_count ,   
tgt.average_business_count = src.average_business_count ,
tgt.business_id            = src.business_id  ,
tgt.review_id              = src.review_id  ,
tgt.state                  = src.state,
tgt.city                   = src.city ,
tgt.stars                  = src.stars , 
tgt.categories             = src.categories, 
tgt.availability           = src.availability,
tgt.business_name          = src.business_name ,  
tgt.start_date             = src.start_date, 
tgt.reviews                = src.reviews ,
tgt.modifiedon             = CURRENT_TIMESTAMP() , 
tgt.createdon              = CURRENT_TIMESTAMP() 
when not matched then 
insert(user_id  ,             
user_name  ,           
user_count  ,          
average_business_count,
business_id  ,         
review_id,             
state ,                
city ,                 
stars,                 
categories ,           
availability ,         
business_name  ,       
start_date ,                       
reviews ,              
modifiedon ,           
createdon  ) 
values(user_id   ,            
user_name ,            
user_count ,           
average_business_count,
business_id,           
review_id ,            
state ,                
city  ,                
stars ,                
categories ,           
availability,          
business_name ,        
start_date ,             
reviews ,            
CURRENT_TIMESTAMP(),
CURRENT_TIMESTAMP()
)


num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
5790989,0,0,5790989


In [0]:
%sql
CREATE TABLE business_db.business_gold
(
  user_id string, 
  user_count int, 
  average_business_count int,
  business_id string,
  state string,
  city string,
  stars double,
  business_name string,
  start_date TIMESTAMP,
  modifiedon timestamp,
  createdon timestamp
)
USING delta 
LOCATION 'dbfs:/FileStore/data/business_gold.delta' 
TBLPROPERTIES (
  delta.enableChangeDataFeed = true
)

In [0]:
%sql
insert overwrite table business_db.business_gold
select user_id, 
user_count ,
average_business_count, 
business_id ,
state ,
stars ,
business_name ,
start_date ,
modifiedon ,
current_timestamp() as createdon,
count(*) over (partition by business_name) as business_cnt
from business_db.business_silver