## Great Lakes Market

### Biomass

In [None]:
# load biomassNorth data from the FS FIA's National Forest Inventory
# data is in excel format on the first sheet
biomassNorth = pd.read_excel('../data/Merch Bio GLakes by spp 08-28-2024.xlsx', sheet_name=0)

# replace NaN with 0 in the biomassNorth data
biomassNorth.fillna(0, inplace=True)

# columns 13-32 are the total volume of timber in cubic feet for each size class
# for example, column 11 is '`0003 5.0-6.9'; which is size class code 0003 and size class 5.0-6.9 inches
# we can use the pandas melt function to convert these columns into rows
biomassNorth = biomassNorth.melt(
    id_vars=biomassNorth.columns[0:13],
    value_vars=biomassNorth.columns[13:29],
    var_name='size_class',
    value_name='volume'
    )


# split the size class code and size class range into two columns
biomassNorth[['size_class_code', 'size_class_range']] = \
    biomassNorth['size_class'].str.split(' ', n=1, expand=True)
# drop the first two characters of the size class code
biomassNorth['size_class_code'] = biomassNorth['size_class_code'].str[2:]
# drop the last character in the size class range
biomassNorth['size_class_range'] = biomassNorth['size_class_range'].str[:-1]


# recode evalid to add year
# convert to string of length 6 with leading zeros
biomassNorth['EVALID'] = biomassNorth['EVALID'].astype(str).str.zfill(6)

# middle two characters are the two digit year; extract and convert to 4 digit integer
biomassNorth['year'] = biomassNorth['EVALID'].str[2:4].astype(int) + 2000



# # format fips codes
# # STATECD should be two characters
# # COUNTYCD should be three characters
biomassNorth['STATECD'] = biomassNorth['STATECD'].astype(str).str.zfill(2)
biomassNorth['COUNTYCD'] = biomassNorth['COUNTYCD'].astype(str).str.zfill(3)
biomassNorth['fips'] = biomassNorth['STATECD'] + biomassNorth['COUNTYCD']

# # format survey unit codes; should be two characters
biomassNorth['UNITCD'] = biomassNorth['UNITCD'].astype(str).str.zfill(2)

# make all variables lowercase
biomassNorth.columns = biomassNorth.columns.str.lower()


# # keep only the columns we need
# # year, fips, unitcd, spclass, spcd, spgrpcd, size_class_code, size_class_range, volume
biomassNorth = biomassNorth[['statenm', 'statecd', 'fips', 'unitcd', 'spcd', 'scientific_name',
                                 'spgrpcd', 'spclass', 'size_class_code',
                                 'size_class_range', 'volume']]

# print number of unique species in each state
print(f"Unique species by state ",biomassNorth.groupby('statecd')['spcd'].nunique())


Unique species by state  statecd
26    88
27    66
55    69
Name: spcd, dtype: int64


### Great Lakes Cut Species

In [None]:
# import openpyxl

# # load harvest species list
# harvestSpeciesGL = pd.read_excel('../data/GLakes harvested tree species V2.xlsx', sheet_name='GL harvested spp')

# # drop if ESTIMATE is 0
# harvestSpeciesGL = harvestSpeciesGL[harvestSpeciesGL['Harvest removals, in trees, at least 5in, forestland'] != 0]

# # drop if Estimate is NaN
# harvestSpeciesGL = harvestSpeciesGL.dropna(subset=['Harvest removals, in trees, at least 5in, forestland'])


# # spatial id in "EVALUATION" looks like "`0055 552101 Wisconsin 2021"
# # where "552101" is statecd + two digit year + evalid
# # extract the state code from the spatial id
# harvestSpeciesGL['statecd'] = harvestSpeciesGL['EVALUATION'].str[6:8]

# # species information in "SPECIES" looks like "`0012 SPCD 0012 - balsam fir (Abies balsamea)"
# # where "SPCD 0012" is the species code
# # extract the second part of species code from the species information
# harvestSpeciesGL['spcd'] = harvestSpeciesGL['SPECIES'].str.split(' ').str[2]

# # convert spcd to int64
# harvestSpeciesGL['spcd'] = harvestSpeciesGL['spcd'].astype('int64')

# # rename 4th column to volume
# harvestSpeciesGL.rename(columns={'Harvest removals, in trees, at least 5in, forestland': 'volume'}, inplace=True)


# # keep only the columns we need
# harvestSpeciesGL = harvestSpeciesGL[['statecd', 'spcd']]


# # sort and print the unique speciesGL
# speciesGL = harvestSpeciesGL.drop_duplicates()
# speciesGL = speciesGL.sort_values('spcd')
# print(speciesGL['spcd'].unique())


In [1]:
# use timberSpecies dictionary to filter BiomassNorth to only include marketable species
marketSpeciesGL = [
    12, 86, 71, 91, 94, 95, 105, 110,
    111, 121, 125, 126, 129, 130, 131,
    132, 221, 313, 314, 316, 318, 371,
    375, 409, 402, 403, 404, 405, 407,
    462, 531, 541, 543, 544, 546, 601,
    602, 611, 621, 651, 652, 653, 742,
    743, 746, 762, 802, 804, 809, 812,
    822, 823, 826, 830, 832, 833, 837,
    951, 972, 977
    ]
biomassNorth = biomassNorth[biomassNorth['spcd'].isin(marketSpeciesGL)]
print(biomassNorth.groupby('statecd')['spcd'].nunique())

NameError: name 'biomassNorth' is not defined

### Great Lakes Stumpage Prices

In [None]:
# read in northern price data from excel file
import openpyxl

pricesNorth = pd.read_excel('../data/Timber Prices/TMN/TMN_Price_Series_June2023.xlsx')


# drop all rows where Region has exactly 2 characters
# these are state mean prices
pricesNorth = pricesNorth[pricesNorth['Region'].str.len() != 2]

# filter for Market = 'Stumpage'
pricesNorth = pricesNorth[pricesNorth['Market'] == 'Stumpage']

# convert 'Period End Date' to datetime
pricesNorth['Period End Date'] = pd.to_datetime(pricesNorth['Period End Date'],
                                                errors='coerce')

# create a year variable from column "Period End Date"
pricesNorth['year'] = pricesNorth['Period End Date'].dt.year

# split the Region column into two columns on '-'
pricesNorth[['state_abbr', 'priceRegion']] = pricesNorth['Region'].str.split('-', n=1, expand=True)
pricesNorth['priceRegion'] = pricesNorth['priceRegion'].str.zfill(2)

# add a column for the state fips code
# first, create a dictionary of state abbreviations and fips codes for MN, WI, MI
state_fips = {'MN': '27', 'WI': '55', 'MI': '26'}
pricesNorth['statecd'] = pricesNorth['state_abbr'].map(state_fips)



# select only the columns we need
# year, priceRegion, Species, Product, $ Per Unit, Units
pricesNorth = pricesNorth[['year', 'statecd', 'priceRegion', 'Species',
                            'Product', '$ Per Unit', 'Units']]

# drop if $ Per Unit is NaN or year is NaN
pricesNorth = pricesNorth.dropna(subset=['$ Per Unit', 'year'])

# if the Units column is 'Cords', convert $ Per Unit to $ per cord
# if the Units column is 'MBF', convert $ Per Unit to $ per MBF
# conversion factors are 1 cord = 128 cubic feet and 1 MBF = 1000 board feet
# 12 board feet = 1 cubic foot
pricesNorth['$ Per Unit'] = pricesNorth['$ Per Unit'].astype(float)
pricesNorth['cuftPrice'] = pricesNorth['$ Per Unit']
pricesNorth.loc[pricesNorth['Units'] == 'cord', 'cuftPrice'] = pricesNorth['$ Per Unit'] / 128
pricesNorth.loc[pricesNorth['Units'] == 'mbf', 'cuftPrice'] = pricesNorth['$ Per Unit'] / 12

# drop the $ Per Unit and Units columns
pricesNorth = pricesNorth.drop(columns=['$ Per Unit', 'Units'])

# rename variables
pricesNorth.rename(columns={'Species': 'priceSpecies'}, inplace=True)

# reduce pricesNorth to the mean price by priceRegion, priceSpecies, Product
pricesNorth = pricesNorth.groupby(
    ['statecd', 'priceRegion','priceSpecies', 'Product']
    )['cuftPrice'].mean().reset_index()

# pivot the table so that each row is a unique year, 
# priceRegion, priceSpecies
# and the columns are the products
pricesNorth = pricesNorth.pivot(
    index=['statecd', 'priceRegion', 'priceSpecies'],
    columns='Product',
    values='cuftPrice').reset_index()

# some prices are not reported for all products
# fill missing values with 0
pricesNorth = pricesNorth.fillna(0)

# add a new variable called "mergedSpecies" to priceNorth. 
# using a specific dictionary, map the priceSpecies to the mergedSpecies
speciesCrosswalk = {'Maple Unspecified': 'Maple',
                    'Mixed Hdwd': 'Hardwood',
                    'Mixed Sftwd': 'Pine',
                    'Other Hdwd': 'Hardwood',
                    'Other Sfwd': 'Pine',
                    'Oak Unspecified': 'Oak',
                    'Other Hardwood': 'Hardwood',
                    'Other Softwood': 'Softwood',
                    'Pine Unspecified': 'Pine',
                    'Spruce Unspecified': 'Spruce',
                    'Spruce/Fir': 'Spruce',
                    'White Birch': 'Paper Birch',
                    'Scrub Oak': 'Oak'}

pricesNorth['mergedSpecies'] = pricesNorth['priceSpecies'].map(speciesCrosswalk)

# if the mergedSpecies is null, set it to the priceSpecies
pricesNorth['mergedSpecies'] = pricesNorth['mergedSpecies'].fillna(pricesNorth['priceSpecies'])

# drop the priceSpecies column
pricesNorth = pricesNorth.drop(columns='priceSpecies')

# use agg() to reduce pricesNorth to the mean price by priceRegion, mergedSpecies
pricesNorth = pricesNorth.groupby(['statecd', 'priceRegion', 'mergedSpecies']).agg({
    'Pulpwood': 'mean',
    'Sawtimber': 'mean'
}).reset_index()

# convert mergedSpecies to lowercase
pricesNorth['mergedSpecies'] = pricesNorth['mergedSpecies'].str.lower()


In [None]:
# Create a crosswalk dataframe
crosswalk = pd.DataFrame({'mergedSpecies': ['white pine', 'pine', 'hard maple',
                                            'hard maple', 'hickory', 'hickory',
                                            'hickory', 'hickory', 'hickory',
                                            'white ash', 'ash', 'hickory',
                                            'black walnut', 'hardwood', 'black cherry',
                                            'white oak', 'oak', 'oak', 'oak',
                                            'oak', 'spruce', 'pine', 'spruce',
                                            'white spruce', 'spruce', 'jack pine',
                                            'red pine', 'pine', 'soft maple',
                                            'soft maple', 'yellow birch', 'white birch',
                                            'elm', 'beech', 'black ash', 'hardwood',
                                            'aspen', 'aspen', 'oak', 'oak',
                                            'red oak', 'basswood', 'elm', 'elm'],
                          'spcd': [129, 131, 314, 318, 402,
                                   403, 405, 407, 409, 541, 544,
                                   601, 602, 621, 762, 802, 804,
                                   823, 830, 837, 12, 71, 91,
                                   94, 95, 105, 125, 130, 313,
                                   316, 371, 375, 462, 531, 543,
                                   742, 743, 746, 809, 826, 833,
                                   951, 972, 977]})

# Merge the crosswalk dataframe with the biomassNorth dataframe
biomassNorth = biomassNorth.merge(crosswalk, on='spcd', how='left')


In [None]:
# merge pricesNorth with biomassNorth
tableNorth = pd.merge(biomassNorth, pricesNorth,
                        how='left', on=['statecd', 'mergedSpecies'])

# calculate total value as volume * price
tableNorth['pwValue'] = tableNorth['volume'] * tableNorth['Pulpwood']
tableNorth['stValue'] = tableNorth['volume'] * tableNorth['Sawtimber']

# if size_class_code is in ['0003', '0004', '0005', '0006'], save to pwTable
pwTable = tableNorth[tableNorth['size_class_code'].isin(['0003', '0004', '0005', '0006'])]

# drop columns from pwTable
pwTable = pwTable[['statecd', 'spclass', 'mergedSpecies', 'volume', 'pwValue']]

# rename columns
pwTable.rename(columns={'mergedSpecies': 'species',
                        'volume': 'pwVolume'}, inplace=True)

# reduce pwTable to the sum of pwVolume and pwValue by statecd, spclass, species
pwTable = pwTable.groupby(['statecd', 'spclass', 'species']).agg({
    'pwVolume': 'sum',
    'pwValue': 'sum'
}).reset_index()

# if size_class_code is in ['0007',...], save to stTable
stTable = tableNorth[tableNorth['size_class_code'].isin(['0007', '0008', '0009',
                                                         '0010', '0011', '0012',
                                                         '0013', '0014', '0015',
                                                         '0016', '0017', '0018'])]

# drop columns from stTable
stTable = stTable[['statecd', 'spclass', 'mergedSpecies', 'volume', 'stValue']]

# rename columns
stTable.rename(columns={'mergedSpecies': 'species',
                        'volume': 'stVolume'}, inplace=True)

# reduce stTable to the sum of stVolume and stValue by statecd, spclass, species
stTable = stTable.groupby(['statecd', 'spclass', 'species']).agg({
    'stVolume': 'sum',
    'stValue': 'sum'
}).reset_index()

tableGL = pd.merge(pwTable, stTable, on=['statecd', 'spclass', 'species'])

