In [1]:
import requests
import zipfile
import duckdb
import os
import time

# URL of the ZIP file
url2 = "https://www.stats.govt.nz/assets/Uploads/Business-financial-data/Business-financial-data-March-2024-quarter/Download-data/business-financial-data-march-2024.zip"

local_zip_path2 = './business_financial_data.zip'
extracted_dir = './extracted/'

csv_file_name2 = 'business-financial-data-march-2024-csv.csv'
csv_file_path2 = os.path.join(extracted_dir, csv_file_name2)



# Download zip file 2 and Extract
response = requests.get(url2)
with open(local_zip_path2, 'wb') as f:
    f.write(response.content)

with zipfile.ZipFile(local_zip_path2, 'r') as zip_ref:
    # Extract all files
    zip_ref.extractall(extracted_dir)

# create Duckdb Database
conn = duckdb.connect("financial.db")


create_table_query2 = f"""
CREATE OR REPLACE TABLE business_financial AS
SELECT * FROM read_csv_auto('{csv_file_path2}');
"""

# Create Tables

conn.execute(create_table_query2)

conn.sql ("SHOW ALL TABLES").show()

cur_time = time.time()
conn.sql ("""
          select  Period,Series_title_2,Data_value from business_financial  where "Group" like '%NZSIOC Level 2%' and Series_title_4 like 'Seasonally adjusted'
          and Data_value =
          (select  MAX(Data_value) from business_financial  where "Group" like '%NZSIOC Level 2%' and Series_title_4 like 'Seasonally adjusted'
          and Data_value <
           (select  MAX(Data_value) from business_financial  where "Group" like '%NZSIOC Level 2%' and Series_title_4 like 'Seasonally adjusted'))
          """).show()

print(f"time1: {time.time() - cur_time}")

cur_time = time.time()

conn.sql ("""
WITH MaxDataValue AS (
    SELECT MAX(Data_value) AS max_data_value
    FROM business_financial
    WHERE "Group" LIKE '%NZSIOC Level 2%'
      AND Series_title_4 LIKE 'Seasonally adjusted'
),

-- Step 2: Identify the second maximum 'Data_value' that is less than the maximum value found in Step 1.
SecondMaxDataValue AS (
    SELECT MAX(Data_value) AS second_max_data_value
    FROM business_financial
    WHERE "Group" LIKE '%NZSIOC Level 2%'
      AND Series_title_4 LIKE 'Seasonally adjusted'
      AND Data_value < (SELECT max_data_value FROM MaxDataValue)
),

-- Step 3: Select records that match the second maximum 'Data_value' identified in Step 2.
FilteredRecords AS (
    SELECT Period, Series_title_2, Data_value
    FROM business_financial
    WHERE "Group" LIKE '%NZSIOC Level 2%'
      AND Series_title_4 LIKE 'Seasonally adjusted'
      AND Data_value = (SELECT second_max_data_value FROM SecondMaxDataValue)
)
-- Step 4: Return the final set of filtered records.
SELECT Period, Series_title_2, Data_value
FROM FilteredRecords
""").show()

print(f"time2: {time.time() - cur_time}")

conn.close()

┌───────────┬─────────┬────────────────────┬──────────────────────┬────────────────────────────────────────┬───────────┐
│ database  │ schema  │        name        │     column_names     │              column_types              │ temporary │
│  varchar  │ varchar │      varchar       │      varchar[]       │               varchar[]                │  boolean  │
├───────────┼─────────┼────────────────────┼──────────────────────┼────────────────────────────────────────┼───────────┤
│ financial │ main    │ business_financial │ [Series_reference,…  │ [VARCHAR, DOUBLE, DOUBLE, VARCHAR, V…  │ false     │
└───────────┴─────────┴────────────────────┴──────────────────────┴────────────────────────────────────────┴───────────┘

┌─────────┬─────────────────┬────────────┐
│ Period  │ Series_title_2  │ Data_value │
│ double  │     varchar     │   double   │
├─────────┼─────────────────┼────────────┤
│ 2023.03 │ Wholesale Trade │  38810.022 │
└─────────┴─────────────────┴────────────┘

time1: 0.01252