## Create the mart layer for the US Climate warehouse
### Declare common variables

In [2]:
project_id = "kiaraerica"
region = "us-central1"
model_name = "gemini-2.0-flash-001"
dataset = "us_climate_mrt"
region = "us-central1"

In [3]:
from google.cloud import bigquery

bq_client = bigquery.Client()

dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok=True)
print("Created dataset {}.{}".format(bq_client.project, resp.dataset_id))

Created dataset kiaraerica.us_climate_mrt


##Question 1: Which industry sectors, facilities, and states contribute the most to greenhouse gasses from facility and facility GHG emissions data?


Identify top carbon-dioxide emitting facilities

In [39]:
%%bigquery
SELECT
    f.facility_id,
    f.facility_name,
    f.state,
    SUM(e.carbon_dioxide_emissions) AS facility_co2_emissions,
    SUM(e.methane_emissions) AS facility_methane_emissions,
    SUM(e.nitrous_oxide_emissions) AS facility_n2o_emissions
FROM us_climate_int.Facility_GHG_Emissions e
JOIN us_climate_int.Facility f
ON e.facility_id = f.facility_id
AND e.facility_name = f.facility_name
AND e.year = f.year
GROUP BY f.facility_id, f.facility_name, f.state
ORDER BY facility_co2_emissions DESC
LIMIT 10;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,facility_id,facility_name,state,facility_co2_emissions,facility_methane_emissions,facility_n2o_emissions
0,1007227,James H Miller Jr,AL,254330979.0,735088.5,1285417.04
1,1001051,Scherer,GA,198635088.4,574870.0,1004715.94
2,1000192,Labadie,MO,195533785.5,565148.25,988194.82
3,1000676,Monroe,MI,190574125.8,514305.25,962128.76
4,1001194,Gen J M Gavin,OH,183613136.3,538419.2,941390.344
5,1007504,Martin Lake,TX,181959935.4,382751.0,889287.726
6,1006868,W A Parish,TX,181158407.1,511740.925,892282.924
7,1001030,Gibson,IN,174606269.5,516934.75,903697.814
8,1001020,Colstrip,MT,166863912.9,471507.75,824571.96
9,1007505,Oak Grove,TX,159801402.8,350960.75,768709.476


Querying `top_facility_emission` to identify the highest-emitting facilities, analyze their contribution to state-wide greenhouse gas emissions, and track pollution trends for regulatory and sustainability efforts.

In [40]:
%%bigquery
CREATE OR REPLACE TABLE us_climate_mrt.top_polluting_facility AS
SELECT
    f.facility_id,
    f.facility_name,
    f.state,
    SUM(e.carbon_dioxide_emissions) AS facility_co2_emissions,
    SUM(e.methane_emissions) AS facility_methane_emissions,
    SUM(e.nitrous_oxide_emissions) AS facility_n2o_emissions
FROM us_climate_int.Facility_GHG_Emissions e
JOIN us_climate_int.Facility f
ON e.facility_id = f.facility_id
AND e.facility_name = f.facility_name
AND e.year = f.year
GROUP BY f.facility_id, f.facility_name, f.state
ORDER BY facility_co2_emissions DESC

Query is running:   0%|          |

Identify top polluting industries

In [41]:
%%bigquery
SELECT
    f.industry_sector1 AS industry_sector,
    SUM(e.carbon_dioxide_emissions) AS industry_co2_emissions,
    SUM(e.methane_emissions) AS industry_methane_emissions,
    SUM(e.nitrous_oxide_emissions) AS industry_n2o_emissions
FROM us_climate_int.Facility_GHG_Emissions e
JOIN us_climate_int.Facility f
ON e.facility_id = f.facility_id AND e.facility_name = f.facility_name AND e.year = f.year
GROUP BY f.industry_sector1
ORDER BY industry_co2_emissions DESC
LIMIT 10;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,industry_sector,industry_co2_emissions,industry_methane_emissions,industry_n2o_emissions
0,Power Plants,23162020000.0,39940390.0,84378210.0
1,Chemicals,2066101000.0,2430625.0,5174402.0
2,Other,1276725000.0,1840696.0,3377009.0
3,Petroleum and Natural Gas Systems,1015249000.0,579385.5,789690.9
4,Metals,796671100.0,399443.0,695779.0
5,Petroleum Product Suppliers,622260200.0,884596.0,2036118.0
6,Pulp and Paper,473443500.0,4370294.0,10768010.0
7,Minerals,375855400.0,618395.1,1039588.0
8,Natural Gas and Natural Gas Liquids Suppliers,159074000.0,88050.11,124001.0
9,Injection of CO2,152166000.0,343273.8,590301.4


Querying `top_industry_emission` to identify the industries contributing the most to greenhouse gas emissions, allowing for trend analysis, regulatory assessments, and targeted mitigation strategies.

In [42]:
%%bigquery
CREATE OR REPLACE TABLE us_climate_mrt.top_polluting_industry AS
SELECT
    f.industry_sector1 AS industry_sector,
    SUM(e.carbon_dioxide_emissions) AS industry_co2_emissions,
    SUM(e.methane_emissions) AS industry_methane_emissions,
    SUM(e.nitrous_oxide_emissions) AS industry_n2o_emissions
FROM us_climate_int.Facility_GHG_Emissions e
JOIN us_climate_int.Facility f
ON e.facility_id = f.facility_id AND e.facility_name = f.facility_name AND e.year = f.year
GROUP BY f.industry_sector1
ORDER BY industry_co2_emissions DESC

Query is running:   0%|          |

## Question 2: Which states and sectors contribute the most to greenhouse gas emissions from state GHG emission data?

Identify top polluting states

In [43]:
%%bigquery
SELECT
    state,
    SUM(state_co2_emissions) AS state_co2_emissions,
    SUM(state_methane_emissions) AS state_methane_emissions,
    SUM(state_n2o_emissions) AS state_n2o_emissions,
FROM (
    SELECT
        geo_ref AS state,
        SUM(CASE WHEN ghg = 'Carbon Dioxide' THEN value ELSE NULL END) AS state_co2_emissions,
        SUM(CASE WHEN ghg = 'Methane' THEN value ELSE NULL END) AS state_methane_emissions,
        SUM(CASE WHEN ghg = 'Nitrous Oxide' THEN value ELSE NULL END) AS state_n2o_emissions
    FROM us_climate_int.State_GHG_Emission
    UNPIVOT(value FOR column_name IN (
        Y1990, Y1991, Y1992, Y1993, Y1994, Y1995, Y1996, Y1997, Y1998, Y1999,
        Y2000, Y2001, Y2002, Y2003, Y2004, Y2005, Y2006, Y2007, Y2008, Y2009,
        Y2010, Y2011, Y2012, Y2013, Y2014, Y2015, Y2016, Y2017, Y2018, Y2019,
        Y2020, Y2021, Y2022
    ))
    GROUP BY state
)
GROUP BY state
ORDER BY state_co2_emissions DESC
LIMIT 10;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,state,state_co2_emissions,state_methane_emissions,state_n2o_emissions
0,TX,20276.344158,3566.223164,832.964776
1,CA,11620.099935,1832.100356,488.592865
2,OH,7928.422995,754.97839,191.812165
3,PA,7918.459394,1403.443944,164.817571
4,FL,7373.591707,647.247545,304.546563
5,IL,7198.19778,757.478619,218.592724
6,IN,7072.69052,472.032244,362.610114
7,LA,6097.043467,798.378517,219.052539
8,NY,6030.675224,507.210732,145.993372
9,MI,5978.896008,588.705836,154.919254


Create mart `top_state_emission` to identify the states with the highest total greenhouse gas (GHG) emissions over the entire recorded period, allowing for comparative analysis of state-level pollution contributions and policy effectiveness.

In [44]:
%%bigquery
CREATE OR REPLACE TABLE us_climate_mrt.top_polluting_state AS
SELECT
    state,
    SUM(state_co2_emissions) AS state_co2_emissions,
    SUM(state_methane_emissions) AS state_methane_emissions,
    SUM(state_n2o_emissions) AS state_n2o_emissions,
FROM (
    SELECT
        geo_ref AS state,
        SUM(CASE WHEN ghg = 'Carbon Dioxide' THEN value ELSE NULL END) AS state_co2_emissions,
        SUM(CASE WHEN ghg = 'Methane' THEN value ELSE NULL END) AS state_methane_emissions,
        SUM(CASE WHEN ghg = 'Nitrous Oxide' THEN value ELSE NULL END) AS state_n2o_emissions
    FROM us_climate_int.State_GHG_Emission
    UNPIVOT(value FOR column_name IN (
        Y1990, Y1991, Y1992, Y1993, Y1994, Y1995, Y1996, Y1997, Y1998, Y1999,
        Y2000, Y2001, Y2002, Y2003, Y2004, Y2005, Y2006, Y2007, Y2008, Y2009,
        Y2010, Y2011, Y2012, Y2013, Y2014, Y2015, Y2016, Y2017, Y2018, Y2019,
        Y2020, Y2021, Y2022
    ))
    GROUP BY state
)
GROUP BY state
ORDER BY state_co2_emissions DESC

Query is running:   0%|          |

Identify top polluting sectors

In [45]:
%%bigquery
SELECT
    sector,
    SUM(sector_co2_emissions) AS sector_co2_emissions,
    SUM(sector_methane_emissions) AS sector_methane_emissions,
    SUM(sector_n2o_emissions) AS sector_n2o_emissions,
FROM (
    SELECT
        sector,
        SUM(CASE WHEN ghg = 'Carbon Dioxide' THEN value ELSE NULL END) AS sector_co2_emissions,
        SUM(CASE WHEN ghg = 'Methane' THEN value ELSE NULL END) AS sector_methane_emissions,
        SUM(CASE WHEN ghg = 'Nitrous Oxide' THEN value ELSE NULL END) AS sector_n2o_emissions
    FROM us_climate_int.State_GHG_Emission
    UNPIVOT(value FOR column_name IN (
        Y1990, Y1991, Y1992, Y1993, Y1994, Y1995, Y1996, Y1997, Y1998, Y1999,
        Y2000, Y2001, Y2002, Y2003, Y2004, Y2005, Y2006, Y2007, Y2008, Y2009,
        Y2010, Y2011, Y2012, Y2013, Y2014, Y2015, Y2016, Y2017, Y2018, Y2019,
        Y2020, Y2021, Y2022
    ))
    GROUP BY sector
)
GROUP BY sector
ORDER BY sector_co2_emissions DESC
LIMIT 10;

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,sector,sector_co2_emissions,sector_methane_emissions,sector_n2o_emissions
0,Energy,176107.357312,11713.574533,1831.934487
1,Industrial Processes and Product Use,6306.038263,31.184355,884.239205
2,Agriculture,256.346786,8760.61727,4787.764963
3,"Land Use, Land-Use Change and Forestry",-10634.210909,708.445252,213.22687
4,Waste,,5803.781506,649.489882


Create mart `top_sector_emission` to identify the highest emitting sectors by finding total greenhouse gas emissions from the `State_GHG_Emission` table.

In [46]:
%%bigquery
CREATE OR REPLACE TABLE us_climate_mrt.top_polluting_sector AS
SELECT
    sector,
    SUM(sector_co2_emissions) AS sector_co2_emissions,
    SUM(sector_methane_emissions) AS sector_methane_emissions,
    SUM(sector_n2o_emissions) AS sector_n2o_emissions,
FROM (
    SELECT
        sector,
        SUM(CASE WHEN ghg = 'Carbon Dioxide' THEN value ELSE NULL END) AS sector_co2_emissions,
        SUM(CASE WHEN ghg = 'Methane' THEN value ELSE NULL END) AS sector_methane_emissions,
        SUM(CASE WHEN ghg = 'Nitrous Oxide' THEN value ELSE NULL END) AS sector_n2o_emissions
    FROM us_climate_int.State_GHG_Emission
    UNPIVOT(value FOR column_name IN (
        Y1990, Y1991, Y1992, Y1993, Y1994, Y1995, Y1996, Y1997, Y1998, Y1999,
        Y2000, Y2001, Y2002, Y2003, Y2004, Y2005, Y2006, Y2007, Y2008, Y2009,
        Y2010, Y2011, Y2012, Y2013, Y2014, Y2015, Y2016, Y2017, Y2018, Y2019,
        Y2020, Y2021, Y2022
    ))
    GROUP BY sector
)
GROUP BY sector
ORDER BY sector_co2_emissions DESC

Query is running:   0%|          |

## Question 3: How has the average temperature changed per state throughout the years from the state average temperatures data? Is there a relationship between the highest and lowest average temperatures and state disasters from the state disasters data?
First find the highest average temperature per year per state to see how it has increased or decreased across the years

In [None]:
%%bigquery
select s.name as state, t.year, max(t.average_temp) as max_temp
from us_climate_int.State_Average_Temperature t
join us_climate_int.Geo_References s
on t.state = s.geo_id
group by s.name, t.year
order by s.name, t.year
limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,state,year,max_temp
0,Alabama,1950,77.6
1,Alabama,1951,81.6
2,Alabama,1952,82.2
3,Alabama,1953,80.1
4,Alabama,1954,82.9
5,Alabama,1955,80.0
6,Alabama,1956,79.8
7,Alabama,1957,79.6
8,Alabama,1958,78.9
9,Alabama,1959,80.0


Create mart table from previous query

In [None]:
%%bigquery
create or replace table us_climate_mrt.highest_avg_temps_by_state_by_year as
  select s.name as state, t.year, max(t.average_temp) as max_temp
  from us_climate_int.State_Average_Temperature t
  join us_climate_int.Geo_References s
  on t.state = s.geo_id
  group by s.name, t.year
  order by s.name, t.year

Query is running:   0%|          |

Now find the lowest average temperature per state per year

In [None]:
%%bigquery
select s.name as state, t.year, min(t.average_temp) as min_temp
from us_climate_int.State_Average_Temperature t
join us_climate_int.Geo_References s
on t.state = s.geo_id
group by s.name, t.year
order by s.name, t.year
limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,state,year,min_temp
0,Alabama,1950,44.9
1,Alabama,1951,44.0
2,Alabama,1952,48.6
3,Alabama,1953,47.2
4,Alabama,1954,45.6
5,Alabama,1955,44.3
6,Alabama,1956,44.1
7,Alabama,1957,51.2
8,Alabama,1958,39.0
9,Alabama,1959,42.8


Create mart table from previous query

In [None]:
%%bigquery
create or replace table us_climate_mrt.lowest_avg_temps_by_state_by_year as
  select s.name as state, t.year, min(t.average_temp) as min_temp
  from us_climate_int.State_Average_Temperature t
  join us_climate_int.Geo_References s
  on t.state = s.geo_id
  group by s.name, t.year
  order by s.name, t.year

Query is running:   0%|          |

Is there a relationship between the highest and lowest average temperatures and state disasters?

In [None]:
%%bigquery
select h.state, h.year, l.min_temp, h.max_temp, drought_count, flooding_count,
       freeze_count, severe_storm_count, tropical_cyclone_count,
       wildfire_count, winter_storm_count
from us_climate_mrt.highest_avg_temps_by_state_by_year h
join us_climate_mrt.lowest_avg_temps_by_state_by_year l on (h.state = l.state and h.year = l.year)
join us_climate_int.Geo_References g on h.state = g.name
join us_climate_int.State_Disaster s on (g.geo_id = s.state and h.year = s.year)
order by h.state, h.year
limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,state,year,min_temp,max_temp,drought_count,flooding_count,freeze_count,severe_storm_count,tropical_cyclone_count,wildfire_count,winter_storm_count
0,Alabama,1980,44.7,82.0,1,0,0,0,0,0,0
1,Alabama,1981,42.2,80.6,0,0,0,1,0,0,0
2,Alabama,1982,43.0,79.0,0,0,0,1,0,0,1
3,Alabama,1983,44.4,80.8,1,1,1,0,0,0,0
4,Alabama,1984,41.0,77.8,0,0,0,0,0,0,0
5,Alabama,1985,41.4,78.4,0,0,0,0,2,0,1
6,Alabama,1986,42.4,80.6,1,0,0,0,0,0,0
7,Alabama,1987,44.4,80.8,0,0,0,0,0,0,0
8,Alabama,1988,43.0,79.6,1,0,0,0,0,0,0
9,Alabama,1989,46.2,78.6,0,0,0,0,0,0,1


Create mart from previous query

In [None]:
%%bigquery
create or replace table us_climate_mrt.high_and_low_avg_temps_and_disasters_by_state as
  select h.state, h.year, l.min_temp, h.max_temp, drought_count, flooding_count,
       freeze_count, severe_storm_count, tropical_cyclone_count,
       wildfire_count, winter_storm_count
  from us_climate_mrt.highest_avg_temps_by_state_by_year h
  join us_climate_mrt.lowest_avg_temps_by_state_by_year l on (h.state = l.state and h.year = l.year)
  join us_climate_int.Geo_References g on h.state = g.name
  join us_climate_int.State_Disaster s on (g.geo_id = s.state and h.year = s.year)
  order by h.state, h.year

Query is running:   0%|          |

## Question 4: What are the most common policy categories in each state from the policies data?

In [None]:
%%bigquery
select p.category, g.name as state, count(*) as count
from us_climate_int.Policies p
join us_climate_int.State_Climate_Policy s
on s.policy = p.policy
join us_climate_int.Geo_References g
on s.state = g.geo_id
group by p.category, g.name
order by count(*) desc, g.name
limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,category,state,count
0,Building Efficiency and Standards,Alabama,6
1,Environmental Justice and Equity,Alabama,6
2,Environmental Justice and Equity,Alaska,6
3,Building Efficiency and Standards,Alaska,6
4,Building Efficiency and Standards,Arizona,6
5,Environmental Justice and Equity,Arizona,6
6,Environmental Justice and Equity,Arkansas,6
7,Building Efficiency and Standards,Arkansas,6
8,Building Efficiency and Standards,California,6
9,Environmental Justice and Equity,California,6


In [None]:
%%bigquery
create or replace table us_climate_mrt.top_policy_categories_by_state as
  select p.category, g.name as state, count(*) as count
  from us_climate_int.Policies p
  join us_climate_int.State_Climate_Policy s
  on s.policy = p.policy
  join us_climate_int.Geo_References g
  on s.state = g.geo_id
  group by p.category, g.name
  order by count(*) desc, g.name

Query is running:   0%|          |

## Question 5: Which organizations have the most facilities capturing, storing, and transporting CO2 and how much do they contribute to CO2 emissions from carbon capture facility and categories data?
First looking at which organizations have the most facilites in each category

In [None]:
%%bigquery
select f.organization, c.category, count(*) as count from us_climate_int.Carbon_Capture_Facility f
join us_climate_int.Carbon_Capture_Categories c on f.id = c.facility_id
group by f.organization, c.category
order by count(*) desc, f.organization, c.category
limit 10

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,organization,category,count
0,Summit Carbon Solutions,Storage,12
1,Summit Carbon Solutions,Capture,12
2,Summit Carbon Solutions,Transport,9
3,Open,Storage,3
4,Open,Capture,3
5,Ash Grove Cement Plant,Storage,2
6,Ash Grove Cement Plant,Capture,2
7,Carbon Management Canada,Storage,2
8,Carbon Management Canada,Capture,2
9,Coming,Storage,2


Create mart from previous query

In [None]:
%%bigquery
create or replace table us_climate_mrt.carbon_category_count_by_organization as
  select f.organization, c.category, count(*) as count from us_climate_int.Carbon_Capture_Facility f
  join us_climate_int.Carbon_Capture_Categories c on f.id = c.facility_id
  group by f.organization, c.category
  order by count(*) desc, f.organization, c.category

Query is running:   0%|          |

See which organizations from carbon capture data contribute to CO2 emissions from facility GHG emissions data. (Note: Not all organizations from carbon capture data are in facility data)

In [None]:
%%bigquery
select c.organization, sum(e.carbon_dioxide_emissions) as total_co2_emissions
from us_climate_int.Carbon_Capture_Facility c
join us_climate_int.Facility f on c.organization = f.organization
join us_climate_int.Facility_GHG_Emissions e on
(f.year = e.year and f.facility_id = e.facility_id and f.facility_name = e.facility_name)
group by c.organization
order by total_co2_emissions desc

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,organization,total_co2_emissions
0,ExxonMobil,257216000.0
1,Shell,37404750.0
2,Arizona Public Service,10722890.0
3,Air Products,9495968.0
4,Occidental Petroleum Corporation,979294.0
5,Plymouth Energy,793681.6
6,Kentucky Utilities Company,653626.2


Create mart from this table

In [None]:
%%bigquery
create or replace table us_climate_mrt.carbon_organizations_co2_emissions as
  select c.organization, sum(e.carbon_dioxide_emissions) as total_co2_emissions
  from us_climate_int.Carbon_Capture_Facility c
  join us_climate_int.Facility f on c.organization = f.organization
  join us_climate_int.Facility_GHG_Emissions e on
  (f.year = e.year and f.facility_id = e.facility_id and f.facility_name = e.facility_name)
  group by c.organization
  order by total_co2_emissions desc

Query is running:   0%|          |