---
Author: Mustapha Bouhsen <br>
[LinkedIn](https://www.linkedin.com/in/mustapha-bouhsen/)<br>
[Git](https://github.com/mus514)<br>
Date: February 7, 2024<br>
---


### Create table from Azure storage


In [0]:
#-----------------------------------------
# Set the prod folder path
#-----------------------------------------
prod_folder_path = "/mnt/prod/"

stocks = ["aapl", "amzn", "googl", "msft"]

In [0]:
#-----------------------------------------
# Create the daily table for each stock
#-----------------------------------------
for stock in stocks:
    # Create the path for the stock
    path = f'{prod_folder_path}{stock}/year=*/month=*/*'
    # Load all the parquet files at once
    df = spark.read.parquet(path)
    
    # Check if the table exists
    if spark.catalog.tableExists(stock):
        # Drop the existing table
        spark.sql(f"DROP TABLE {stock}")
        print(f'Dropped table: {stock}')
    
    # Create the table
    df.write.format("parquet").saveAsTable(stock)
    print(f'Table for {stock} is created')

Dropped table: aapl
Table for aapl is created
Dropped table: amzn
Table for amzn is created
Dropped table: googl
Table for googl is created
Dropped table: msft
Table for msft is created


In [0]:
%sql
--- Display the table for apple stock

SELECT *
FROM aapl
LIMIT 10

date,open,high,low,close,volume
2019-10-31,247.24,249.17,237.26,248.76,34790520.0
2019-10-30,244.76,245.3,241.21,243.26,31130522.0
2019-10-29,248.97,249.75,242.57,243.29,35709868.0
2019-10-28,247.42,249.25,246.72,249.05,23655368.0
2019-10-25,243.16,246.73,242.88,246.58,18369296.0
2019-10-24,244.51,244.8,241.81,243.58,17916256.0
2019-10-23,242.1,243.24,241.22,243.18,19932544.0
2019-10-22,241.16,242.2,239.62,239.96,22684000.0
2019-10-21,237.52,240.99,237.32,240.51,21811568.0
2019-10-18,234.59,237.58,234.29,236.41,24248024.0


In [0]:
%sql
SELECT year(date), avg(close) as mean_stock_price_by_year
FROM aapl
GROUP BY year(date)
ORDER BY year(date)

year(date),mean_stock_price_by_year
1999,96.85930278689362
2000,71.74892876261757
2001,20.21911299228668
2002,19.13952378621177
2003,18.54334883462815
2004,35.52689698385814
2005,51.68272765139316
2006,70.81172754660071
2007,128.27446834974555
2008,141.9790202506446


In [0]:
%sql
DROP TABLE IF EXISTS stocks_prices;

CREATE TABLE stocks_prices AS
SELECT aapl.date, aapl.close as aapl, amzn.close as amzn, msft.close as msft, googl.close as googl
FROM aapl
JOIN amzn ON aapl.date = amzn.date
JOIN msft ON aapl.date = msft.date
JOIN googl ON aapl.date = googl.date
ORDER BY date;

num_affected_rows,num_inserted_rows


In [0]:
%sql

SELECT *
FROM stocks_prices
LIMIT 10

date,aapl,amzn,msft,googl
2004-08-19,30.71,38.63,27.12,100.335
2004-08-20,30.8,39.51,27.2,108.31
2004-08-23,31.08,39.45,27.24,109.4
2004-08-24,31.95,39.05,27.24,104.87
2004-08-25,33.05,40.3,27.55,106.0
2004-08-26,34.66,40.19,27.44,107.91
2004-08-27,34.35,39.9,27.46,106.15
2004-08-30,34.12,38.31,27.3,102.01
2004-08-31,34.49,38.14,27.3,102.37
2004-09-01,35.86,38.24,27.39,100.25
