# New project name
> Explain what this notebook does specifically and link to the data source. 

---

#### Import Python tools and Jupyter config

In [2]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
import geopandas as gpd

In [3]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None

In [4]:
today = pd.Timestamp("today").strftime("%Y-%m-%d")

---

## Subhead, i.e. "Fetch"

#### Read data from XyXy source (live data where possible over local downloads)

In [30]:
import subprocess
import os


def download_and_convert_kmz_to_geojson():
    # URLs for the KMZ files
    kmz_url = "https://www.spc.noaa.gov/climo/reports/yesterday.kmz"
    kmz_file_name = "yesterday.kmz"
    kmz_extracted_folder = "kmz_extraction"

    # Ensure the extraction directory exists
    os.makedirs(kmz_extracted_folder, exist_ok=True)

    # Download the KMZ file
    subprocess.run(["curl", "-o", kmz_file_name, kmz_url], check=True)
    print(f"Downloaded {kmz_url} to {kmz_file_name}")

    # Extract the KMZ file
    subprocess.run(
        ["unzip", "-o", kmz_file_name, "-d", kmz_extracted_folder], check=True
    )

    # Assuming the first file extracted is the KML file
    extracted_files = os.listdir(kmz_extracted_folder)
    kml_file_name = [f for f in extracted_files if f.endswith(".kml")][0]
    kml_file_path = os.path.join(kmz_extracted_folder, kml_file_name)
    print(f"Extracted KML file: {kml_file_path}")

    # List layers in the KML file using ogrinfo
    ogrinfo_cmd = ["ogrinfo", kml_file_path]
    layers_output = subprocess.check_output(ogrinfo_cmd).decode()
    print("Layers identified in the KML file:")
    print(layers_output)

    # Extract layer names
    layers = [
        line.split(":")[1].strip() for line in layers_output.split("\n") if "1:" in line
    ]
    print(f"Layers to be converted: {layers}")

    # Convert each layer to GeoJSON
    for layer in layers:
        safe_layer_name = layer.replace(" ", "_").replace("-", "_").replace(":", "_")
        geojson_file_name = f"{safe_layer_name}.geojson"
        geojson_file_path = os.path.join(kmz_extracted_folder, geojson_file_name)
        ogr2ogr_cmd = [
            "ogr2ogr",
            "-f",
            "GeoJSON",
            geojson_file_path,
            kml_file_path,
            layer,
            "-skipfailures",
        ]
        subprocess.run(ogr2ogr_cmd, check=True)
        print(f"Created GeoJSON file: {geojson_file_path}")

    print("Conversion complete. GeoJSON files created for each layer.")


if __name__ == "__main__":
    download_and_convert_kmz_to_geojson()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 39929  100 39929    0     0  81271      0 --:--:-- --:--:-- --:--:-- 82327


Downloaded https://www.spc.noaa.gov/climo/reports/yesterday.kmz to yesterday.kmz
Archive:  yesterday.kmz
  inflating: kmz_extraction/240314_rpts.kml  
Extracted KML file: kmz_extraction/240314_rpts.kml
Layers identified in the KML file:
INFO: Open of `kmz_extraction/240314_rpts.kml'
      using driver `LIBKML' successful.
1: 240314_rpts
2: SPC Storm Reports KML Info
3: SPC Tornado Reports 2024-03-14 1200Z - 2024-03-15 1159Z
4: SPC Damaging Wind Reports 2024-03-14 1200Z - 2024-03-15 1159Z
5: SPC Hail Reports 2024-03-14 1200Z - 2024-03-15 1159Z

Layers to be converted: ['240314_rpts']
Created GeoJSON file: kmz_extraction/240314_rpts.geojson
Conversion complete. GeoJSON files created for each layer.


---

## Process

#### Clean dates, standardize categories, etc. 

---

## Aggregate

#### Groupby state, etc.

---

## Metadata

#### Data provenance, column descriptions, etc.

---

## Exports

#### XyXy subset in CSV format to `processed`

#### JSON, GeoJSON, etc., to `processed`