In [17]:
import pandas as pd
import numpy as np
import duckdb

np.random.seed(42)

dates = pd.date_range(start="2024-01-01", end="2024-06-30", freq="B")
tickers = ["GOOG", "MSFT", "NVDA"]

rows = []

for ticker in tickers:
    price = np.random.uniform(100, 500)
    for date in dates:
        open_p = price + np.random.uniform(-2, 2)
        close_p = open_p + np.random.uniform(-3, 3)
        high = max(open_p, close_p) + np.random.uniform(0, 2)
        low = min(open_p, close_p) - np.random.uniform(0, 2)
        volume = np.random.randint(1_000_000, 5_000_000)

        rows.append([
            date, ticker,
            round(open_p, 2),
            round(high, 2),
            round(low, 2),
            round(close_p, 2),
            volume
        ])

        price = close_p

prices = pd.DataFrame(
    rows,
    columns=["date", "ticker", "open", "high", "low", "close", "volume"]
)

prices.head()


Unnamed: 0,date,ticker,open,high,low,close,volume
0,2024-01-01,GOOG,251.62,254.21,251.31,253.01,4097042
1,2024-01-02,GOOG,251.41,252.08,250.88,251.17,3138242
2,2024-01-03,GOOG,249.25,253.73,248.82,252.07,1791743
3,2024-01-04,GOOG,254.04,255.97,254.02,254.74,3583384
4,2024-01-05,GOOG,253.91,254.86,253.32,254.58,1648143


In [None]:
fundamentals = pd.DataFrame({
    "ticker": ["GOOG", "MSFT", "NVDA"],
    "sector": ["Technology", "Technology", "Technology"],
    "pe_ratio": [28.5, 34.2, 45.1],
    "market_cap": [1.8e12, 2.9e12, 1.2e12]
})

fundamentals


Unnamed: 0,ticker,sector,pe_ratio,market_cap
0,GOOG,Technology,28.5,1800000000000.0
1,MSFT,Technology,34.2,2900000000000.0
2,NVDA,Technology,45.1,1200000000000.0


In [19]:
# Function to execute SQL queries
def run_sql(query):
    return duckdb.query(query).to_df()

1. Display the first 10 rows of the prices table ordered by date and ticker.

In [20]:
run_sql("""
SELECT date, ticker, close
FROM prices
ORDER BY date, ticker
LIMIT 10
""")


Unnamed: 0,date,ticker,close
0,2024-01-01,GOOG,253.01
1,2024-01-01,MSFT,172.28
2,2024-01-01,NVDA,485.36
3,2024-01-02,GOOG,251.17
4,2024-01-02,MSFT,169.13
5,2024-01-02,NVDA,485.68
6,2024-01-03,GOOG,252.07
7,2024-01-03,MSFT,165.71
8,2024-01-03,NVDA,484.44
9,2024-01-04,GOOG,254.74


2. Show date, ticker, and close price for all rows.

In [21]:
run_sql("""
SELECT date, ticker, close
FROM prices
""")

Unnamed: 0,date,ticker,close
0,2024-01-01,GOOG,253.01
1,2024-01-02,GOOG,251.17
2,2024-01-03,GOOG,252.07
3,2024-01-04,GOOG,254.74
4,2024-01-05,GOOG,254.58
...,...,...,...
385,2024-06-24,NVDA,506.33
386,2024-06-25,NVDA,508.44
387,2024-06-26,NVDA,506.05
388,2024-06-27,NVDA,503.01


3. Retrieve all rows for ticker 'GOOG'.

In [22]:
run_sql("""
SELECT date, ticker, close
FROM prices
WHERE ticker = 'GOOG'
""")

Unnamed: 0,date,ticker,close
0,2024-01-01,GOOG,253.01
1,2024-01-02,GOOG,251.17
2,2024-01-03,GOOG,252.07
3,2024-01-04,GOOG,254.74
4,2024-01-05,GOOG,254.58
...,...,...,...
125,2024-06-24,GOOG,242.46
126,2024-06-25,GOOG,243.74
127,2024-06-26,GOOG,242.25
128,2024-06-27,GOOG,239.90


4. Get all price records from March 2024 only.

In [23]:
run_sql("""
SELECT date, ticker, close
FROM prices
WHERE date > '2024-02-29' AND date <'2024-04-01'
""")

Unnamed: 0,date,ticker,close
0,2024-03-01,GOOG,250.21
1,2024-03-04,GOOG,247.69
2,2024-03-05,GOOG,245.66
3,2024-03-06,GOOG,245.83
4,2024-03-07,GOOG,246.76
...,...,...,...
58,2024-03-25,NVDA,499.00
59,2024-03-26,NVDA,502.05
60,2024-03-27,NVDA,501.42
61,2024-03-28,NVDA,505.85


5. Find days where:

- ticker = 'MSFT'

- close price > 300

- volume > 2,000,000

In [24]:
run_sql("""
SELECT date, close, volume
FROM prices
WHERE ticker = 'GOOG' AND close > 250 AND volume > 2000000
""")

Unnamed: 0,date,close,volume
0,2024-01-01,253.01,4097042
1,2024-01-02,251.17,3138242
2,2024-01-04,254.74,3583384
3,2024-01-08,251.05,4474675
4,2024-01-18,250.79,2767640
5,2024-01-19,250.17,2425472
6,2024-01-23,250.28,4446532
7,2024-01-29,251.03,3712422
8,2024-01-30,250.45,3350770
9,2024-03-14,251.56,4471080


6. Retrieve all records for tickers GOOG and NVDA.

In [25]:
run_sql("""
SELECT date, ticker, open, high, low, close, volume
FROM prices
WHERE ticker IN ('GOOG', 'NVDA')
""")

Unnamed: 0,date,ticker,open,high,low,close,volume
0,2024-01-01,GOOG,251.62,254.21,251.31,253.01,4097042
1,2024-01-02,GOOG,251.41,252.08,250.88,251.17,3138242
2,2024-01-03,GOOG,249.25,253.73,248.82,252.07,1791743
3,2024-01-04,GOOG,254.04,255.97,254.02,254.74,3583384
4,2024-01-05,GOOG,253.91,254.86,253.32,254.58,1648143
...,...,...,...,...,...,...,...
255,2024-06-24,NVDA,508.50,509.58,505.90,506.33,2726676
256,2024-06-25,NVDA,507.66,508.84,507.54,508.44,3753183
257,2024-06-26,NVDA,508.39,509.49,505.14,506.05,3607411
258,2024-06-27,NVDA,505.11,505.37,501.28,503.01,2348207


7. Show the top 5 highest-volume days across all tickers.

In [None]:
run_sql("""
SELECT date, ticker, volume
FROM prices
ORDER BY volume DESC
LIMIT 5;
""")

Unnamed: 0,date,volume
0,2024-04-04,4995609
1,2024-06-27,4963735
2,2024-03-07,4951416
3,2024-04-15,4939311
4,2024-01-23,4931965


8. Show date, ticker, and close as closing_price.

In [None]:
run_sql("""
SELECT 
    date, 
    ticker, 
    close AS closing_price
FROM 
    prices;
""")

Unnamed: 0,date,ticker,closing_price
0,2024-01-01,GOOG,253.01
1,2024-01-02,GOOG,251.17
2,2024-01-03,GOOG,252.07
3,2024-01-04,GOOG,254.74
4,2024-01-05,GOOG,254.58
...,...,...,...
385,2024-06-24,NVDA,506.33
386,2024-06-25,NVDA,508.44
387,2024-06-26,NVDA,506.05
388,2024-06-27,NVDA,503.01


9. Create a query that shows:

- date

- ticker

- price change = close − open

Name the column price_change.

In [None]:
run_sql("""
SELECT
    date,
    ticker,
    (close - open) AS price_change
FROM prices;
""")

Unnamed: 0,date,ticker,price_change
0,2024-01-01,GOOG,1.39
1,2024-01-02,GOOG,-0.24
2,2024-01-03,GOOG,2.82
3,2024-01-04,GOOG,0.70
4,2024-01-05,GOOG,0.67
...,...,...,...
385,2024-06-24,NVDA,-2.17
386,2024-06-25,NVDA,0.78
387,2024-06-26,NVDA,-2.34
388,2024-06-27,NVDA,-2.10
