In [1]:
import pandas as pd
import os
import pathlib
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np

#### Data Preparation: Forest Cover Data
##### Convert forest acerage to percentage of county forest cover
##### Join 1995 and 2021 forest cover percentage data to Maine county shapefiles

In [40]:
nb_path = pathlib.Path().resolve()
data_path = nb_path / 'data' / 'forest_cover'

In [41]:
data_path

WindowsPath('C:/Users/roseh/Desktop/785/forest_comp/data/forest_cover')

In [31]:
def format_forest_cover_df(file, year):
    """Convert species acreage to percent of total country acerage and read to csv file"""
    
    df = pd.read_csv(file, na_values= '-', index_col = 'County code and name')  # read in file, replace '-' with NaN, set county name as index
    df = df.replace(',','', regex=True).astype('float')   # remove commas and convert all to float
    df.drop(df.tail(1).index,inplace=True)   # drop empty last row
    df.drop(df.head(1).index,inplace=True)   # drop unneeded first row
    
    cols = [i for i in df.columns[1:]]            # define list of column names, excluding first column
    
    df[cols] = df[cols].div(df['Total'], axis=0).multiply(100)    # convert species acerage to percent of total acreage
    df['COUNTY'] = df.index.str[8:]     # add column with just county name
    df.fillna(0,inplace=True)  # replace all NaN with 0
    
    filepath = 'C:/Users/roseh/Desktop/785/forest_comp/data/forest_cover'
    df.to_csv(os.path.join(filepath, f'{year}_forest_cover_percentage.csv'))

In [33]:
# read in 1995 forest cover dataset and apply function

file = os.path.join(data_path, '1995_forest_cover.csv')
format_forest_cover_df(file, '1995')

In [35]:
# read in 2021 forest cover dataset and apply function

file = os.path.join(data_path, '2021_forest_cover.csv')
format_forest_cover_df(file, '2021')

1995_forest_cover.csv


In [44]:
def merge_county_w_forestcover(counties,forest, year):
    """Merge forest percentage data to Maine county gdf and output to shapefile"""
    
    forest['COUNTY'] = forest['COUNTY'].str.strip()   # remove whitespace to match counties df
    
    df  = counties.merge(forest, on='COUNTY', how='left')    # merge gdf and df on matching COUNTY column
    gdf_forestcover = gpd.GeoDataFrame(df)
    
    data_path = 'C:/Users/roseh/Desktop/785/forest_comp/data/forest_cover'
    gdf_forestcover.to_file(os.path.join(data_path,f'forest_cover_{year}.shp'))   # read merged gdf to shapefile

In [52]:
# read in and format Maine counties shapefile

maine_data_path = nb_path / 'data' / 'maine'
file = os.path.join(maine_data_path, 'Maine_County_Boundary_Polygons_Dissolved_Feature.shp')
counties = gpd.read_file(file)

counties.drop(columns=['OBJECTID', 'GlobalID','created_us', 
                           'created_da', 'last_edite', 'last_edi_1'], inplace=True)      #drop unneeded columns from gdf
    
counties['COUNTY'] = counties['COUNTY'].str.strip()                                      #remove whitespace from strings in COUNTY column

In [53]:
# read in forest percentage dataframes
forest_1995 = pd.read_csv(os.path.join(data_path,'1995_forest_cover_percentage.csv'))
forest_2021 = pd.read_csv(os.path.join(data_path,'2021_forest_cover_percentage.csv'))

In [54]:
# apply merge function to each df
merge_county_w_forestcover(counties,forest_1995,'1995')
merge_county_w_forestcover(counties,forest_2021,'2021')

  gdf_forestcover.to_file(os.path.join(data_path,f'forest_cover_{year}.shp'))   # read merged gdf to shapefile
