**How to Query the US Forest Service (USFS) Forest Inventory and Analysis (FIA) Program Data
(BigQuery Dataset)**

In [1]:
import bq_helper
from bq_helper import BigQueryHelper
# https://www.kaggle.com/sohier/introduction-to-the-bq-helper-package
usfs = bq_helper.BigQueryHelper(active_project="bigquery-public-data",
                                   dataset_name="usfs_fia")

In [2]:
bq_assistant = BigQueryHelper("bigquery-public-data", "usfs_fia")
bq_assistant.list_tables()

['condition',
 'estimated_forestland_acres',
 'estimated_timberland_acres',
 'plot',
 'plot_tree',
 'population',
 'population_estimation_unit',
 'population_evaluation',
 'population_evaluation_group',
 'population_evaluation_type',
 'population_stratum',
 'population_stratum_assign',
 'tree']

In [3]:
bq_assistant.head("plot_tree", num_rows=15)

Unnamed: 0,plot_sequence_number,survey_sequence_number,county_sequence_number,previous_plot_sequence_number,plot_inventory_year,plot_state_code,plot_state_code_name,plot_survey_unit_code,plot_county_code,plot_phase_2_plot_number,...,actual_centroid_diameter_height,upper_stem_diameter,upper_stem_diameter_height,sound_cubicfoot_volume_sawlog_portion,dry_biomass_sawlog_portion_sawtimber_tree,damage_agent_code_1_southern_research_station,damage_agent_code_2_southern_research_station,damage_agent_code_3_southern_research_station,aboveground_dry_biomass,unique_tree
0,10669715010497,36857999010497,13005243000666,,2001,2,Alaska,1,122,42052,...,,,,,,,,,,
1,5244702010661,5234631010661,650010661,,1983,31,Nebraska,2,45,248,...,,,,,,,,,,
2,22599852010661,22473732010661,527010661,,1989,29,Missouri,3,161,23,...,,,,,,,,,,
3,5428119010661,5415713010661,730010661,,1995,46,South Dakota,1,41,866,...,,,,,,,,,,
4,259076729010854,172272516010854,10140119000333,,2011,48,Texas,7,229,180,...,,,,,,,,,,
5,37277412010690,37258316010690,194010690,,2010,32,Nevada,1,31,84203,...,,,,,,,,,,
6,220874175010661,220870811010661,824010661,102486300000000.0,2011,55,Wisconsin,5,71,20056,...,,,,,,,,,,
7,40422651010690,40409733010690,278010690,,2015,56,Wyoming,1,39,88237,...,,,,,,,,,,
8,35379436010690,35368339010690,184010690,,2007,32,Nevada,1,9,88159,...,,,,,,,,,,
9,238242410010854,158050680010854,516010478,,1978,5,Arkansas,5,145,90045,...,,,,,,,,,,


In [4]:
bq_assistant.table_schema("plot_tree")

[SchemaField('plot_sequence_number', 'INTEGER', 'NULLABLE', 'Foreign key linking the subplot record to the plot record', ()),
 SchemaField('survey_sequence_number', 'INTEGER', 'NULLABLE', 'Foreign key linking the plot record to the survey record.', ()),
 SchemaField('county_sequence_number', 'INTEGER', 'NULLABLE', 'Foreign key linking the snapshot record to the county record.', ()),
 SchemaField('previous_plot_sequence_number', 'INTEGER', 'NULLABLE', "Foreign key linking the plot record to the previous inventory's plot record for this location. Only populated on remeasurement plots.", ()),
 SchemaField('plot_inventory_year', 'INTEGER', 'NULLABLE', 'The year that best represents when the inventory data were collected. Under the annual inventory system, a group of plots is selected each year for sampling. The selection is based on a panel system. Inventory year is the year in which the majority of plots in that group were collected (plots in the group have the same panel and, if applicab

What information does this table have about trees and plots in King County, Washington?



In [5]:
# Note: State and county are FIPS state codes.
query1 = """
SELECT
    plot_sequence_number,
    plot_state_code,
    plot_county_code,
    measurement_year,
    latitude,
    longitude,
    tree_sequence_number,
    species_code,
    current_diameter
FROM
    `bigquery-public-data.usfs_fia.plot_tree`
WHERE
    plot_state_code = 53
    AND plot_county_code = 33
;
        """
response1 = usfs.query_to_pandas_safe(query1, max_gb_scanned=10)
response1.head(20)

Unnamed: 0,plot_sequence_number,plot_state_code,plot_county_code,measurement_year,latitude,longitude,tree_sequence_number,species_code,current_diameter
0,22954536010497,53,33,2007,47.276656,-122.139196,,,
1,40220440010497,53,33,2010,47.400692,-122.068314,,,
2,48205592010497,53,33,2013,47.323984,-121.862422,,,
3,22956152010497,53,33,2007,47.326645,-122.005285,,,
4,22954560010497,53,33,2007,47.430085,-122.15241,,,
5,22956164010497,53,33,2007,47.473993,-122.294025,,,
6,48204480010497,53,33,2011,47.441714,-122.027084,,,
7,15342728010497,53,33,2005,47.622465,-121.929014,,,
8,13049424010497,53,33,2005,47.326971,-122.078664,,,
9,22933072010497,53,33,2008,47.576598,-122.150858,,,


What other interesting information does this table have about trees in King County, Washington?


In [6]:
query2 = """
SELECT
    plot_sequence_number,
    plot_state_code,
    plot_state_code_name
    plot_county_code,
    measurement_year,
    latitude,
    longitude,
    tree_sequence_number,
    species_code,
    species_common_name,
    species_scientific_name,
    current_diameter
FROM
    `bigquery-public-data.usfs_fia.plot_tree`
WHERE
    plot_state_code = 53
    AND plot_county_code = 33
;
        """
response2 = usfs.query_to_pandas_safe(query2, max_gb_scanned=10)
response2.head(10)

Unnamed: 0,plot_sequence_number,plot_state_code,plot_county_code,measurement_year,latitude,longitude,tree_sequence_number,species_code,species_common_name,species_scientific_name,current_diameter
0,22954536010497,53,Washington,2007,47.276656,-122.139196,,,,,
1,40220440010497,53,Washington,2010,47.400692,-122.068314,,,,,
2,48205592010497,53,Washington,2013,47.323984,-121.862422,,,,,
3,22956152010497,53,Washington,2007,47.326645,-122.005285,,,,,
4,22954560010497,53,Washington,2007,47.430085,-122.15241,,,,,
5,22956164010497,53,Washington,2007,47.473993,-122.294025,,,,,
6,48204480010497,53,Washington,2011,47.441714,-122.027084,,,,,
7,15342728010497,53,Washington,2005,47.622465,-121.929014,,,,,
8,13049424010497,53,Washington,2005,47.326971,-122.078664,,,,,
9,22933072010497,53,Washington,2008,47.576598,-122.150858,,,,,


What information is there in this table about timberland?

In [7]:
query5 = """
Select  
 pt.plot_sequence_number as plot_sequence_number,
 p.evaluation_type evaluation_type,
 p.evaluation_group as evaluation_group,
 p.evaluation_description as evaluation_description,
 pt.plot_state_code_name as state_name,
 p.inventory_year as inventory_year,
 p.state_code as state_code, 
 #calculate area - this replaces the "decode" logic in example from Oracle
 CASE
  WHEN c.proportion_basis = 'MACR' and p.adjustment_factor_for_the_macroplot > 0
  THEN
    (p.expansion_factor * c.condition_proportion_unadjusted * p.adjustment_factor_for_the_macroplot) 
  ELSE 0
 END as macroplot_acres,
 CASE
  WHEN c.proportion_basis = 'SUBP' and p.adjustment_factor_for_the_subplot > 0
  THEN
    (p.expansion_factor * c.condition_proportion_unadjusted * p.adjustment_factor_for_the_subplot) 
  ELSE 0
 END as subplot_acres
FROM 
  `bigquery-public-data.usfs_fia.condition`  c
JOIN 
  `bigquery-public-data.usfs_fia.plot_tree`  pt
        ON pt.plot_sequence_number = c.plot_sequence_number
JOIN 
  `bigquery-public-data.usfs_fia.population`  p
      ON p.plot_sequence_number = pt.plot_sequence_number
WHERE 
  p.evaluation_type = 'EXPCURR'
  AND c.condition_status_code = 1
GROUP BY 
 plot_sequence_number,
 evaluation_type,
 evaluation_group,
 evaluation_description,
 macroplot_acres,
 subplot_acres,
 inventory_year,
 state_code,
 state_name
;
        """
response5 = usfs.query_to_pandas_safe(query5, max_gb_scanned=50)
response5.head(50)

Unnamed: 0,plot_sequence_number,evaluation_type,evaluation_group,evaluation_description,state_name,inventory_year,state_code,macroplot_acres,subplot_acres
0,10670274010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,7415.065036
1,3084404010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
2,5244708010661,EXPCURR,311983,NEBRASKA 1983: CURRENT AREA (PERIODIC),Nebraska,1983,31,0.0,0.0
3,10671197010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,2007.783425
4,3083934010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
5,10671137010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,7078.577172
6,3069512010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
7,3012191010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
8,10670455010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,5969.622073
9,9173008010661,EXPCURR,91985,CONNECTICUT 1985: CURRENT AREA (PERIODIC),Connecticut,1985,9,0.0,0.0


What information is there in this table about forestland?

In [8]:
query6 = """
Select  
 pt.plot_sequence_number as plot_sequence_number,
 p.evaluation_type evaluation_type,
 p.evaluation_group as evaluation_group,
 p.evaluation_description as evaluation_description,
 pt.plot_state_code_name as state_name,
 p.inventory_year as inventory_year,
 p.state_code as state_code, 
 #calculate area - this replaces the "decode" logic in example from Oracle
 CASE
  WHEN c.proportion_basis = 'MACR' and p.adjustment_factor_for_the_macroplot > 0
  THEN
    (p.expansion_factor * c.condition_proportion_unadjusted * p.adjustment_factor_for_the_macroplot) 
  ELSE 0
 END as macroplot_acres,
 CASE
  WHEN c.proportion_basis = 'SUBP' and p.adjustment_factor_for_the_subplot > 0
  THEN
    (p.expansion_factor * c.condition_proportion_unadjusted * p.adjustment_factor_for_the_subplot) 
  ELSE 0
 END as subplot_acres
FROM 
  `bigquery-public-data.usfs_fia.condition`  c
JOIN 
  `bigquery-public-data.usfs_fia.plot_tree`  pt
        ON pt.plot_sequence_number = c.plot_sequence_number
JOIN 
  `bigquery-public-data.usfs_fia.population`  p
      ON p.plot_sequence_number = pt.plot_sequence_number
WHERE 
  p.evaluation_type = 'EXPCURR'
  AND c.condition_status_code = 1
GROUP BY 
 plot_sequence_number,
 evaluation_type,
 evaluation_group,
 evaluation_description,
 macroplot_acres,
 subplot_acres,
 inventory_year,
 state_code,
 state_name;
        """
response6 = usfs.query_to_pandas_safe(query6, max_gb_scanned=50)
response6.head(50)

Unnamed: 0,plot_sequence_number,evaluation_type,evaluation_group,evaluation_description,state_name,inventory_year,state_code,macroplot_acres,subplot_acres
0,3084404010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
1,10670274010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,7415.065036
2,5244708010661,EXPCURR,311983,NEBRASKA 1983: CURRENT AREA (PERIODIC),Nebraska,1983,31,0.0,0.0
3,10671197010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,2007.783425
4,3083934010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
5,10671137010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,7078.577172
6,3069512010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
7,3012191010690,EXPCURR,561984,Wyoming: 1984 area,Wyoming,1984,56,0.0,0.0
8,10670455010497,EXPCURR,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,1996,2,0.0,5969.622073
9,9173008010661,EXPCURR,91985,CONNECTICUT 1985: CURRENT AREA (PERIODIC),Connecticut,1985,9,0.0,0.0


What is the approximate amount of timberland acres by state?



In [9]:
query3 = """
#standardSQL

SELECT
state_code,
evaluation_group,
evaluation_description,
state_name,
sum(macroplot_acres) + sum(subplot_acres) as total_acres,
latest
FROM (SELECT
        state_code,
        evaluation_group,
        evaluation_description,
        state_name,
        macroplot_acres,
        subplot_acres,
        MAX(evaluation_group) OVER (PARTITION By state_code) as latest                   
        FROM `bigquery-public-data.usfs_fia.estimated_timberland_acres` )
WHERE evaluation_group = latest
GROUP by state_code, state_name, evaluation_description, evaluation_group, latest
order by state_name
;
        """
response3 = usfs.query_to_pandas_safe(query3, max_gb_scanned=10)
response3.head(50)

Unnamed: 0,state_code,evaluation_group,evaluation_description,state_name,total_acres,latest
0,1,12017,"ALABAMA 2017: 2009-2017: CURRENT AREA, CURRENT...",Alabama,22649680.0,12017
1,2,22015,"ALASKA 2015: 2004-2015: CURRENT AREA, CURRENT ...",Alaska,6122441.0,22015
2,60,602012,"AMERICAN SAMOA 2012: CURRENT AREA, CURRENT VOLUME",American Samoa,32918.72,602012
3,4,42016,"ARIZONA 2016: 2007-2016: CURRENT AREA, CURRENT...",Arizona,2939672.0,42016
4,5,52016,"ARKANSAS 2016: 2012-2016: CURRENT AREA, CURREN...",Arkansas,18210910.0,52016
5,6,62016,"CALIFORNIA 2016: 2005-2016: CURRENT AREA, CURR...",California,16458380.0,62016
6,8,82016,"COLORADO 2016: 2007-2016: CURRENT AREA, CURREN...",Colorado,10437970.0,82016
7,9,92016,"CONNECTICUT 2016: 2011-2016: CURRENT AREA, CUR...",Connecticut,1750730.0,92016
8,10,102016,"DELAWARE 2016: 2011-2016: CURRENT AREA, CURREN...",Delaware,333937.9,102016
9,12,122015,"FLORIDA 2015: 2011-2015: CURRENT AREA, CURRENT...",Florida,15082740.0,122015


What is the approximate amount of forestland acres by state?


In [10]:
query4 = """
SELECT
state_code,
evaluation_group,
evaluation_description,
state_name,
sum(macroplot_acres) + sum(subplot_acres) as total_acres,
latest
FROM (SELECT
        state_code,
        evaluation_group,
        evaluation_description,
        state_name,
        macroplot_acres,
        subplot_acres,
        MAX(evaluation_group) OVER (PARTITION By state_code) as latest                   
        FROM `bigquery-public-data.usfs_fia.estimated_forestland_acres` )
WHERE evaluation_group = latest
GROUP by state_code, state_name, evaluation_description, evaluation_group, latest
order by state_name
;
        """
response4 = usfs.query_to_pandas_safe(query4, max_gb_scanned=10)
response4.head(50)

Unnamed: 0,state_code,evaluation_group,evaluation_description,state_name,total_acres,latest
0,1,12017,"ALABAMA 2017: 2009-2017: CURRENT AREA, CURRENT...",Alabama,22744610.0,12017
1,2,29503,"Alaska 1995-2003 Periodic, Excludes National F...",Alaska,10620390.0,29503
2,60,602012,"AMERICAN SAMOA 2012: CURRENT AREA, CURRENT VOLUME",American Samoa,39155.85,602012
3,4,42016,"ARIZONA 2016: 2007-2016: CURRENT AREA, CURRENT...",Arizona,18352360.0,42016
4,5,52016,"ARKANSAS 2016: 2012-2016: CURRENT AREA, CURREN...",Arkansas,18743430.0,52016
5,6,62016,"CALIFORNIA 2016: 2005-2016: CURRENT AREA, CURR...",California,31864230.0,62016
6,8,82016,"COLORADO 2016: 2007-2016: CURRENT AREA, CURREN...",Colorado,22694550.0,82016
7,9,92016,"CONNECTICUT 2016: 2011-2016: CURRENT AREA, CUR...",Connecticut,1786300.0,92016
8,10,102016,"DELAWARE 2016: 2011-2016: CURRENT AREA, CURREN...",Delaware,348795.2,102016
9,12,122015,"FLORIDA 2015: 2011-2015: CURRENT AREA, CURRENT...",Florida,16922120.0,122015


![](https://cloud.google.com/blog/big-data/2017/10/images/4728824346443776/forest-data-4.png)
https://cloud.google.com/blog/big-data/2017/10/images/4728824346443776/forest-data-4.png
