In [12]:
# Identity with the relevant layers:

# See "Analysis" map
# CFLB (from Jordan) - dissolved 
# THLB (using raster analysis might help here with the percentages. Rasterize the THLB on the grid using the THLB PCT field, then do a "count")
# Protected area (AllProtectedAreas)
# BurnSeverity2023
# Burn2024 (Note - lots of these are "holdover fires". Could report areas that are 2024 and not 2023 in output if desired)
# VRIAgeRange (calculated from VRI. Report the intersection with the burns)
# WHA - Timber Harvest field
# UWR - Timber Harvest field
# Schedule K (Note - check how she wants these filtered - "Approved" (data is old))
# Approved Woodlots


# Set the parameters to add a tolerance of 100m or so and to snap to other layers.

#I believe this needs to be a Union - since Identity will just add a 
gdb = r'\\spatialfiles2.bcgov\work\FOR\RNI\DPC\General_User_Data\nross\BRFN_NE_LUPCE_Analysis\BRFNIdentityLayers.gdb'
inFeatures = ['BRFN_AnalysisAreas_Dissolve', 'ApprovedUWR', 'ApprovedWHA', 'Burn2024', 'BurnSeverity2023', 'VRIAgeRange1', 'aflb_ne_redo', 'thlb_ne_redo']
outFeatures = 'BRFN_AnalysisAreas_Union'

with arcpy.EnvManager(XYTolerance="100 Unknown"):
    arcpy.analysis.Union(inFeatures, outFeatures, "NO_FID")
    
    


ExecuteError: ERROR 160196: Invalid Topology
Failed to execute (Union).


In [1]:
import geopandas as gpd
import pandas as pd
import fiona 
import os
# Create output dataframe

# Read final polygon after union operations
gdb = r'\\spatialfiles2.bcgov\work\FOR\RNI\DPC\General_User_Data\nross\BRFN_NE_LUPCE_Analysis\BRFNLupce.gdb'

layer_name = 'BRFN_AnalysisAreas_V2_Dissolve'

df = gpd.read_file(gdb, layer=layer_name)

df['AreaHa'] = df['Shape_Area']/10000
df.loc[df['FIRE_NUMBER'] != '', 'DonnieCreek'] = 1

# calculate AFLB and THLB areas
df['aflb_ha'] = df['AreaHa'] * df['aflb_fact']
df['thlb_ha'] = df['AreaHa'] * df['thlb_fact']
df = df.drop(columns=['FIRE_NUMBER', 'aflb_fact', 'thlb_fact', 'Shape_Length', 'Shape_Area', 'geometry'])

print(df.columns)

Index(['HV1Name', 'HV1Zone_A_B_C', 'HV1ZoneLabel', 'Priority_WMB_NAME',
       'TRAPLINE_AREA_IDENTIFIER', 'Full_WMB_Name', 'FRRA',
       'WHA_TIMBER_HARVEST_CODE', 'FID_Burn2024', 'BURN_SEVERITY_RATING',
       'AgeRange', 'UWR_TIMBER_HARVEST_CODE', 'FID_SchK_Blocks',
       'FID_SchK_Woodlots', 'FID_AllProtectedAreas_redo', 'AreaHa',
       'DonnieCreek', 'aflb_ha', 'thlb_ha'],
      dtype='object')


In [4]:
# set up columns dictionary. You will need to edit this with your output column names
outColumnsList = [
    {
        'name': 'Gross Area',
        'mask': df['AreaHa'].notnull() # AKA entire area
    },
    {
         'name': 'AFLB',
         'mask': df['aflb_ha'] > 0,
         'sumfield': 'aflb_ha' # sum this field if specified
    },
    {
        'name': 'THLB',
        'mask': df['thlb_ha'] > 0,
        'sumfield': 'thlb_ha'
    },
    {
        'name': 'Protected',
        'mask': df['FID_AllProtectedAreas_redo'] > -1
    },
    {
        'name': 'Burn 2023 High',
        'mask': df['BURN_SEVERITY_RATING'] == 'High'
    },
    {
        'name': 'Burn 2023 Med',
        'mask': df['BURN_SEVERITY_RATING'] == 'Medium'
    },
    {
        'name': 'Burn 2023 Low',
        'mask': df['BURN_SEVERITY_RATING'] == 'Low'
    },
    {
        'name': 'New Burn 2024',
        'mask': ((df['FID_Burn2024'] > -1) & (df['BURN_SEVERITY_RATING'] == ''))
    },
    {
        'name': 'Old Forest: 250+',
        'mask': df['AgeRange'] == '250+'
    },
    {
        'name': 'Old Forest 250+ High Burn',
        'mask': ((df['AgeRange'] == '250+') & (df['BURN_SEVERITY_RATING'] == 'High'))
    },
    {
        'name': 'Old Forest 250+ Med Burn',
        'mask': ((df['AgeRange'] == '250+') & (df['BURN_SEVERITY_RATING'] == 'Medium'))  
    },
    {
        'name': 'Old Forest 250+ Low Burn',
        'mask': ((df['AgeRange'] == '250+') & (df['BURN_SEVERITY_RATING'] == 'Low')) 
    },
    {
        'name': 'Old Forest: 140-249',
        'mask': df['AgeRange'] == '140 - 249'
    },
    {
        'name': 'Old Forest 140 - 249 High Burn',
        'mask': ((df['AgeRange'] == '140 - 249') & (df['BURN_SEVERITY_RATING'] == 'High'))
    },
    {
        'name': 'Old Forest 140 - 249 Med Burn',
        'mask': ((df['AgeRange'] == '140 - 249') & (df['BURN_SEVERITY_RATING'] == 'Medium'))
    },
    {
        'name': 'Old Forest 140 - 249 Low Burn',
        'mask': ((df['AgeRange'] == '140 - 249') & (df['BURN_SEVERITY_RATING'] == 'Low'))
    },
    {
        'name': 'Old Forest: 100 - 139',
        'mask': df['AgeRange'] == '100 - 139'
    },
    {
        'name': 'Old Forest 100 - 139 High Burn',
        'mask': ((df['AgeRange'] == '100 - 139') & (df['BURN_SEVERITY_RATING'] == 'High'))
    },
    {          
        'name': 'Old Forest 100 - 139 Med Burn',
        'mask': ((df['AgeRange'] == '100 - 139') & (df['BURN_SEVERITY_RATING'] == 'Medium'))
    },
    {
        'name': 'Old Forest 100 - 139 Low Burn',
        'mask': ((df['AgeRange'] == '100 - 139') & (df['BURN_SEVERITY_RATING'] == 'Low'))
    },
    {
        'name': 'WHA (no harvest)',
        'mask': df['WHA_TIMBER_HARVEST_CODE'] == 'NO HARVEST ZONE'
    },
    {
        'name': 'WHA (conditional harvest)',
        'mask': df['WHA_TIMBER_HARVEST_CODE'] == 'CONDITIONAL HARVEST ZONE'
    },
    {
        'name': 'UWR (no harvest)',
        'mask': df['UWR_TIMBER_HARVEST_CODE'] == 'NO HARVEST ZONE'
    },
    {
        'name': 'UWR (conditional harvest)',
        'mask': df['UWR_TIMBER_HARVEST_CODE'] == 'CONDITIONAL HARVEST ZONE'
    },
    {
         'name': 'Schedule K Blocks',
         # Double check how she wants these filtered
         'mask': df['FID_SchK_Blocks'] > -1
     },
     {
         'name': 'Schedule K Woodlots',
         # Double check how she wants these filtered
         'mask': df['FID_SchK_Woodlots'] > -1 
     }
]

In [3]:
# Set up rows list of dictionaries
# The script will loop through these to get the "row chunks" for the output
outRowsList = [
    {
        'category': 'WMB - Priority and Cameron',
        'area': df.loc[df['Priority_WMB_NAME'] != ""], # This loc function selects all records where Priority WMB name is not an empty string
        'groupField': 'Priority_WMB_NAME'
    },
    { # Note that Cameron River isn't in the Priority WMB, so I added a new record in the category
        'category': 'WMB - Priority and Cameron',
        'area': df.loc[df['Full_WMB_Name'] == "Cameron River"],
        'groupField': 'Full_WMB_Name'
    },
    {
        'category': 'HV1 By Zone',
        'area': df.loc[df['HV1Zone_A_B_C'] != ""],
        'groupField': 'HV1Zone_A_B_C'
    },
    { # For this one, I group by HV1 Name since she wanted these individually reported
        'category': 'HV1 Individual Polygons - Gundy',
        'area': df.loc[df['HV1ZoneLabel'] == 'C - Plan 1'],
        'groupField': 'HV1Name'
    },
    { # WMB and HV1
        'category': 'WMB-HV1 Intersection including HV1 outside WMB',
        'area': df.loc[(df['HV1Zone_A_B_C'] != '')],
        'groupField': ['Full_WMB_Name', 'HV1Zone_A_B_C']
    },
    { # Traplines
        'category': 'Individual Traplines',
        'area': df.loc[df['TRAPLINE_AREA_IDENTIFIER'] != ''],
        'groupField': 'TRAPLINE_AREA_IDENTIFIER'
    },
    { # WMB Trapline intersection
        'category': 'WMB-Trapline Intesection including outside WMB',
        'area': df.loc[df['TRAPLINE_AREA_IDENTIFIER'] != ''],
        'groupField': ['Full_WMB_Name', 'TRAPLINE_AREA_IDENTIFIER']
    },
    { # FRRA
        'category': 'FRRA',
        'area': df.loc[df['FRRA'] == 1],
        'groupField': 'FRRA'
    },
    { # FRRA - WMB intersection
        'category': 'FRRA - WMB Intersection',
        'area': df.loc[df['FRRA'] == 1],
        'groupField': ['Full_WMB_Name', 'FRRA']
    },
    { # Donnie Creek
        'category': 'Donnie Creek',
        'area': df.loc[df['DonnieCreek'] == 1],
        'groupField': 'DonnieCreek'
    },
    { # Donnie Creek - WMB intersection
        'category': 'Donnie Creek - WMB Intersection',
        'area': df.loc[df['DonnieCreek'] == 1],
        'groupField': ['Full_WMB_Name', 'DonnieCreek']
    }
]


In [5]:
# Prepare the output data frame. This can be exported to an Excel

# First define the empty dataframe to be added onto later
outputdf = pd.DataFrame()

# Loop over the rows list
for r in outRowsList:
    # create a blank "row dataframe" that we will add all the columns to. This will be appended to the output df
    rdf = pd.DataFrame()
    group = r['groupField']
    # Loop over columns list
    for c in outColumnsList:
        # get some variables from the dict so it's easier to read:
        mask = c['mask']
        cname = c['name']
        
        # Create "Column dataframe" using a subset of the one defined in the row dictionary.
        cdf = r['area'].loc[mask]
        
        # Choose the "Sum Field", usually this is area except in some cases where I defined a sumfield key in the dict
        try:
            sumField = c['sumfield']
        except KeyError:
            sumField = 'AreaHa'
            
        # if it's a "two-field", concat them together:
        if type(group) == list:
            cdf['merged'] = cdf[group[0]].astype('str') + " - " + cdf[group[1]].astype('str')
            cdf = cdf[['merged', sumField]].groupby('merged').sum()
        else:
            # Remove all columns except group field and area, then preform group by and sum operations.
            cdf = cdf[[group, sumField]].groupby(group).sum()
    
        # Add category (from row dictionary)
        cdf['Category'] = r['category']
        # Set index to a multi index of Category and "Polygon" name
        cdf = cdf.set_index(['Category', cdf.index.rename('Polygon')])
        # Rename the area field to the column name for output
        cdf = cdf.rename(columns={'AreaHa': cname})
        
        # If rdf is not empty, set it to the "cdf"
        if len(rdf) == 0:
            rdf = cdf
        # otherwise, join the cdf on to the existing "rdf"
        else:
            rdf = rdf.join(cdf, how='outer')
    # Add the rdf to the output dataframe
    outputdf = pd.concat([outputdf, rdf])
            

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cdf['merged'] = cdf[group[0]].astype('str') + " - " + cdf[group[1]].astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cdf['merged'] = cdf[group[0]].astype('str') + " - " + cdf[group[1]].astype('str')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cdf['merged'] = cdf[group[0]].astype('str

In [6]:
# save as Excel with date
from datetime import datetime
now = datetime.now()
date = now.strftime("%Y-%m-%d")
wks = r'\\spatialfiles2.bcgov\work\FOR\RNI\DPC\General_User_Data\nross\BRFN_NE_LUPCE_Analysis'
outfile = os.path.join(wks, 'Deliverables', f'LUP Analysis Pandas {date}.xlsx')
outputdf.style.map(lambda v: "number-format: #,##0").to_excel(outfile)

In [95]:
# Other analysis:

# Amount of old in "protection"
old_df = df.loc[(df['FID_AllProtectedAreas_redo'] > -1)]
old_df = old_df[['AgeRange', 'AreaHa']]
old_df.groupby(['AgeRange']).sum()

Unnamed: 0_level_0,AreaHa
AgeRange,Unnamed: 1_level_1
,43736.951138
100 - 139,19978.415657
140 - 249,58232.791258
250+,720.116175


In [96]:
# Amount of old in Schedule K 
old_df = df.loc[(df['FID_SchK_Blocks'] > -1) | (df['FID_SchK_Woodlots'] > -1)]
old_df.loc[df['FID_SchK_Blocks'] > -1, 'SchK_Type'] = 'Blocks'
old_df.loc[df['FID_SchK_Woodlots'] > -1, 'SchK_Type'] = 'Woodlots'

old_df = old_df[['AgeRange', 'SchK_Type', 'AreaHa']]
old_df = old_df.groupby(['AgeRange', 'SchK_Type']).sum()
# old_df.reset_index()
old_df.reset_index().pivot(index = 'AgeRange', columns='SchK_Type', values='AreaHa')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_df.loc[df['FID_SchK_Blocks'] > -1, 'SchK_Type'] = 'Blocks'


SchK_Type,Blocks,Woodlots
AgeRange,Unnamed: 1_level_1,Unnamed: 2_level_1
,901.225375,1997.583071
100 - 139,2031.435725,1111.478693
140 - 249,1532.414945,1273.906786
250+,64.292193,


In [97]:
WMB

Unnamed: 0_level_0,AreaHa,Pct
AgeRange,Unnamed: 1_level_1,Unnamed: 2_level_1
,640812.046467,0.517151
100 - 139,398464.263518,0.32157
140 - 249,198750.650598,0.160397
250+,1093.622379,0.000883


In [98]:
# Age required for Trapline and WMB Old Growth Targets
# Trapline requires 33% old
# WMB required 25% old

trap = df.loc[df['TRAPLINE_AREA_IDENTIFIER'] != '']
trap = trap[['AgeRange', 'AreaHa']]
trap['Pct'] = trap['AreaHa'] / trap['AreaHa'].sum()
trap = trap.groupby(['AgeRange']).sum()

WMB = df.loc[(df['Priority_WMB_NAME'] != "") | (df['Full_WMB_Name'] == "Cameron River")]
WMB = WMB[['AgeRange', 'AreaHa']]
WMB['Pct'] = WMB['AreaHa'] / WMB['AreaHa'].sum()
WMB = WMB.groupby(['AgeRange']).sum()

x = trap.merge(WMB, on=['AgeRange'])
x

Unnamed: 0_level_0,AreaHa_x,Pct_x,AreaHa_y,Pct_y
AgeRange,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
,429110.801619,0.554015,640812.046467,0.517151
100 - 139,232935.91373,0.300738,398464.263518,0.32157
140 - 249,111335.763028,0.143743,198750.650598,0.160397
250+,1164.327119,0.001503,1093.622379,0.000883


In [99]:
df.to_excel('test.xlsx')