# Create the staging layer for the US Climate warehouse
### Staging dataset

In [None]:
pip install --upgrade ipywidgets nbformat nbconvert

Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting nbconvert
  Downloading nbconvert-7.16.6-py3-none-any.whl.metadata (8.5 kB)
Collecting comm>=0.1.3 (from ipywidgets)
  Downloading comm-0.2.2-py3-none-any.whl.metadata (3.7 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nbconvert-7.16.6-py3-none-any.whl (258 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.5/258.5 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading comm-0.2.2-py3-none-any.whl (7.2 kB)
Downloading jupyterlab_widgets-3.0.15-py3-none-any.

In [None]:
from google.cloud import bigquery

project_id = "kiaraerica"
dataset = "us_climate_stg"
region = "us-central1"

bq_client = bigquery.Client()

dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok=True)
print("Created dataset {}.{}".format(bq_client.project, resp.dataset_id))

Created dataset kiaraerica.us_climate_stg


## `state_climate_policies` table
#### Replace `''` with null and cast `year_enacted`

In [None]:
%%bigquery
select policy, policy_area, category, status,
case year_enacted when '' then null else safe_cast(year_enacted as INTEGER) end as year_enacted,
state, _data_source, _load_time
from us_climate_raw.state_climate_policies;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,policy,policy_area,category,status,year_enacted,state,_data_source,_load_time
0,Greenhouse Gas Emissions Reduction Targets,Climate Governance and Equity,Climate Governance,Not Enacted,,Alaska,climatexchange,2025-02-04 16:15:14.794411+00:00
1,Climate Action Plans,Climate Governance and Equity,Climate Governance,Not Enacted,,Alaska,climatexchange,2025-02-04 16:15:14.794411+00:00
2,State Greenhouse Gas Emissions Inventory,Climate Governance and Equity,Climate Governance,Enacted,2023,Alaska,climatexchange,2025-02-04 16:15:14.794411+00:00
3,Climate Bureaucracy,Climate Governance and Equity,Climate Governance,Not Enacted,,Alaska,climatexchange,2025-02-04 16:15:14.794411+00:00
4,Climate Advisory Bodies,Climate Governance and Equity,Climate Governance,Not Enacted,,Alaska,climatexchange,2025-02-04 16:15:14.794411+00:00
...,...,...,...,...,...,...,...,...
3092,Fracking Bans,"Industry, Materials, and Waste Management",Oil and Gas Regulations,Not Enacted,,Alabama,climatexchange,2025-02-04 16:15:14.794411+00:00
3093,Landfill Methane Regulations,"Industry, Materials, and Waste Management",Waste Management,Not Enacted,,Alabama,climatexchange,2025-02-04 16:15:14.794411+00:00
3094,Food Waste Bans and Targets,"Industry, Materials, and Waste Management",Waste Management,Not Enacted,,Alabama,climatexchange,2025-02-04 16:15:14.794411+00:00
3095,Agriculture Financial Incentives,Natural and Working Lands,Agriculture,Not Enacted,,Alabama,climatexchange,2025-02-04 16:15:14.794411+00:00


#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.state_climate_policies as
  select state, policy, policy_area, category, status,
  case year_enacted when '' then null else safe_cast(year_enacted as INTEGER) end as year_enacted,
  _data_source, _load_time
  from us_climate_raw.state_climate_policies;

Query is running:   0%|          |

## `carbon_capture_facilities` table

#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.carbon_capture_facilities as
  select
    id,
    facility,
    organization,
    city,
    state,
    category,
    status,
    industry,
    _data_source,
    _load_time
  from us_climate_raw.carbon_capture_facilities;

Query is running:   0%|          |

## `facility_ghg_emissions` table

#### Split `industry_sector` into industry_sector1, industry_sector2, and industry_sector3

In [None]:
%%bigquery
SELECT
    SPLIT(industry_sector, ',')[SAFE_OFFSET(0)] AS industry_sector1,
    SPLIT(industry_sector, ',')[SAFE_OFFSET(1)] AS industry_sector2,
    SPLIT(industry_sector, ',')[SAFE_OFFSET(2)] AS industry_sector3
FROM us_climate_raw.facility_ghg_emissions;

Executing query with job ID: ab26994a-4d77-4af3-9aff-37ab0826d3a1
Query executing: 1.43s


ERROR:
 400 GET https://bigquery.googleapis.com/bigquery/v2/projects/kiaraerica/jobs/ab26994a-4d77-4af3-9aff-37ab0826d3a1?projection=full&location=us-central1&prettyPrint=false: API deadline too short

Location: us-central1
Job ID: ab26994a-4d77-4af3-9aff-37ab0826d3a1



#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.facility_ghg_emissions as
  SELECT
    facility_id,
    facility_name,
    city,
    state,
    naics_code,
    year,
    SPLIT(industry_sector, ',')[SAFE_OFFSET(0)] AS industry_sector1,
    SPLIT(industry_sector, ',')[SAFE_OFFSET(1)] AS industry_sector2,
    SPLIT(industry_sector, ',')[SAFE_OFFSET(2)] AS industry_sector3,
    max_rated_heat_input_capacity,
    carbon_dioxide_emissions,
    methane_emissions,
    nitrous_oxide_emissions,
    biogenic_co2_emissions,
    _data_source,
    _load_time
  FROM us_climate_raw.facility_ghg_emissions;

Query is running:   0%|          |

## `state_disasters` table

#### Split the cost ranges into min and max

In [None]:
%%bigquery
SELECT
    state,
    year,
    drought_count,
    SAFE_CAST(SPLIT(drought_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS drought_cost_min,
    SAFE_CAST(SPLIT(drought_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS drought_cost_max,
    flooding_count,
    SAFE_CAST(SPLIT(flooding_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS flooding_cost_min,
    SAFE_CAST(SPLIT(flooding_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS flooding_cost_max,
    freeze_count,
    SAFE_CAST(SPLIT(freeze_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS freeze_cost_min,
    SAFE_CAST(SPLIT(freeze_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS freeze_cost_max,
    severe_storm_count,
    SAFE_CAST(SPLIT(severe_storm_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS severe_storm_cost_min,
    SAFE_CAST(SPLIT(severe_storm_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS severe_storm_cost_max,
    tropical_cyclone_count,
    SAFE_CAST(SPLIT(tropical_cyclone_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS tropical_cyclone_cost_min,
    SAFE_CAST(SPLIT(tropical_cyclone_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS tropical_cyclone_cost_max,
    wildfire_count,
    SAFE_CAST(SPLIT(wildfire_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS wildfire_cost_min,
    SAFE_CAST(SPLIT(wildfire_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS wildfire_cost_max,
    winter_storm_count,
    SAFE_CAST(SPLIT(winter_strom_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS winter_storm_cost_min,
    SAFE_CAST(SPLIT(winter_strom_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS winter_storm_cost_max,
    all_disasters_count,
    SAFE_CAST(SPLIT(all_disasters_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS all_disasters_cost_min,
    SAFE_CAST(SPLIT(all_disasters_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS all_disasters_cost_max
FROM us_climate_raw.state_disasters;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,state,year,drought_count,drought_cost_min,drought_cost_max,flooding_count,flooding_cost_min,flooding_cost_max,freeze_count,freeze_cost_min,...,tropical_cyclone_cost_max,wildfire_count,wildfire_cost_min,wildfire_cost_max,winter_storm_count,winter_storm_cost_min,winter_storm_cost_max,all_disasters_count,all_disasters_cost_min,all_disasters_cost_max
0,AL,1980,1,1000,2000,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1000,2000
1,AL,1981,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,250,500
2,AL,1982,0,0,0,0,0,0,0,0,...,0,0,0,0,1,5,100,2,5,100
3,AL,1983,1,250,500,1,100,250,1,100,...,0,0,0,0,0,0,0,3,500,1000
4,AL,1984,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2245,AK,2020,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2246,AK,2021,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2247,AK,2022,0,0,0,0,0,0,0,0,...,0,1,5,100,0,0,0,1,5,100
2248,AK,2023,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.state_disasters as
  SELECT
    state,
    year,
    drought_count,
    SAFE_CAST(SPLIT(drought_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS drought_cost_min,
    SAFE_CAST(SPLIT(drought_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS drought_cost_max,
    flooding_count,
    SAFE_CAST(SPLIT(flooding_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS flooding_cost_min,
    SAFE_CAST(SPLIT(flooding_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS flooding_cost_max,
    freeze_count,
    SAFE_CAST(SPLIT(freeze_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS freeze_cost_min,
    SAFE_CAST(SPLIT(freeze_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS freeze_cost_max,
    severe_storm_count,
    SAFE_CAST(SPLIT(severe_storm_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS severe_storm_cost_min,
    SAFE_CAST(SPLIT(severe_storm_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS severe_storm_cost_max,
    tropical_cyclone_count,
    SAFE_CAST(SPLIT(tropical_cyclone_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS tropical_cyclone_cost_min,
    SAFE_CAST(SPLIT(tropical_cyclone_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS tropical_cyclone_cost_max,
    wildfire_count,
    SAFE_CAST(SPLIT(wildfire_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS wildfire_cost_min,
    SAFE_CAST(SPLIT(wildfire_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS wildfire_cost_max,
    winter_storm_count,
    SAFE_CAST(SPLIT(winter_strom_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS winter_storm_cost_min,
    SAFE_CAST(SPLIT(winter_strom_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS winter_storm_cost_max,
    all_disasters_count,
    SAFE_CAST(SPLIT(all_disasters_cost_range, '-')[SAFE_OFFSET(0)] AS INT64) AS all_disasters_cost_min,
    SAFE_CAST(SPLIT(all_disasters_cost_range, '-')[SAFE_OFFSET(1)] AS INT64) AS all_disasters_cost_max,
    _data_source,
    _load_time
  FROM us_climate_raw.state_disasters;

Query is running:   0%|          |

## `climate_risk_projections` table
#### Rename fields and create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.climate_risk_projections as
  select geo_id as county_code,
    temp_change,
    precipitation_change,
    extreme_precipitation,
    extreme_cold,
    extreme_heat,
    dry_change,
    impervious_surface,
    housing_density,
    population_estimate,
    low_lying_houses,
    `low-lying_roads` as low_lying_roads,
    hazard,
    exposure,
    vulnerability,
    risk_percentage,
    _data_source,
    _load_time
  from us_climate_raw.climate_risk_projections;

Query is running:   0%|          |

##`county_codes` table

In [None]:
%%bigquery
create or replace table us_climate_stg.county_codes as
  select state,
    county,
    fips_state,
    fips_county,
    county_code,
    _data_source,
    _load_time
  from us_climate_raw.county_codes;

Query is running:   0%|          |

## `state_average_temperature` table
#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.state_average_temperature as
  select
    month,
    year,
    state,
    average_temp,
    monthly_mean_from_1901_to_2000,
    _data_source,
    _load_time
  from us_climate_raw.state_average_temperature;

Query is running:   0%|          |

## `state_electricity_generation_fuel_shares` table
#### Create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.state_electricity_generation_fuel_shares as
  select
    state,
    nuclear,
    coal,
    natural_gas,
    petroleum,
    hydro,
    geothermal,
    solar_power,
    wind,
    biomass_and_others,
    _data_source,
    _load_time
  from us_climate_raw.state_electricity_generation_fuel_shares;

Query is running:   0%|          |

## `state_ghg_emissions` table
#### Rename `fuel1` and create staging table

In [None]:
%%bigquery
create or replace table us_climate_stg.state_ghg_emissions as
  select
    econ_sector,
    econ_subsector,
    sector,
    subsector,
    category,
    sub_category_1,
    sub_category_2,
    sub_category_3,
    fuel1 as fuel,
    geo_ref,
    ghg,
    Y1990, Y1991, Y1992, Y1993, Y1994, Y1995, Y1996, Y1997, Y1998, Y1999,
    Y2000, Y2001, Y2002, Y2003, Y2004, Y2005, Y2006, Y2007, Y2008, Y2009,
    Y2010, Y2011, Y2012, Y2013, Y2014, Y2015, Y2016, Y2017, Y2018, Y2019,
    Y2020, Y2021, Y2022,
    _data_source,
    _load_time
  from us_climate_raw.state_ghg_emissions;

Query is running:   0%|          |