# 1. Import the required libraries

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import shape as sp

# 2. Load the downloaded shapefiles and retain solely the required fields

## Regions in the Democratic Republic of Congo
There are two regions of interest in the Democratic Republic of Congo which we have selected, namely `Equateur` and `Orientale`.

**Data manipulation task:** The data manipulation required is as below - 
(i) Drop the columns which are not required (ii) Rename the columns which are retained (Country, and Region respectively) and (iii) Insert a new column 'to_crs' with the epsg code of the transform required to convert the polygonal geometry from a geographic projection to a distance projection (in metres) for computing distances (such as areas and perimeters); The correct transform code required for different parts of the globe can be obtained from epsg.io

- [EPSG.IO](https://epsg.io/)

In [2]:
#Equateur
equateur = gpd.read_file('./shapes/DRCongo/Equateur.shp')
equateur = equateur.drop(columns=['ID_0', 'ISO', 'ID_1', 'TYPE_1', 'ENGTYPE_1', 'NL_NAME_1', 'VARNAME_1'])
equateur.rename(columns={'NAME_0':'Country', 'NAME_1':'Region'}, inplace=True)
equateur['to_crs'] = 'epsg:4057'
equateur

Unnamed: 0,Country,Region,geometry,to_crs
0,Democratic Republic of the Congo,Équateur,"POLYGON ((19.60209 5.13152, 19.61267 5.12790, ...",epsg:4057


In [3]:
#Orientale
orientale = gpd.read_file('./shapes/DRCongo/Orientale.shp')
orientale = orientale.drop(columns=['ID_0', 'ISO', 'ID_1', 'TYPE_1', 'ENGTYPE_1', 'NL_NAME_1', 'VARNAME_1'])
orientale.rename(columns={'NAME_0':'Country', 'NAME_1':'Region'}, inplace=True)
orientale['to_crs'] = 'epsg:4057'
orientale

Unnamed: 0,Country,Region,geometry,to_crs
0,Democratic Republic of the Congo,Orientale,"POLYGON ((25.55505 5.37646, 25.56033 5.37533, ...",epsg:4057


## Regions in the Republic of Congo
There are two regions of interest in the Republic of Congo which we have selected, namely `Likouala` and `Dzangha-Sangha`.

**Data manipulation task:** The data manipulation required is as below - 
(i) Drop the columns which are not required (ii) Rename the columns which are retained (Country, and Region respectively) (iii) Rename the country to 'Congo' (iv) Reorder the columns through a dataframe subsetting operation and (v) Insert a new column 'to_crs' with the epsg code of the transform required to convert the polygonal geometry from a geographic projection to a distance projection (in metres) for computing distances (such as areas and perimeters); The correct transform code required for different parts of the globe can be obtained from epsg.io

- [EPSG.IO](https://epsg.io/)

In [4]:
#Likouala= 5.084731 = area
likouala = gpd.read_file('./shapes/RepofCongo/Likouala.shp')
likouala = likouala.drop(columns=['Shape_Leng', 'Shape_Area', 'ADM1_PCODE', 'ADM1_REF', 'ADM1ALT1FR', 'ADM1ALT2FR', 'ADM0_PCODE', 'date', 'validOn', 'validTo'])
likouala.rename(columns={'ADM1_FR':'Region', 'ADM0_FR':'Country'}, inplace=True)
likouala['Country'] = 'Congo'
likouala = likouala[['Country', 'Region', 'geometry']]
likouala['to_crs'] = 'epsg:4057'
likouala

Unnamed: 0,Country,Region,geometry,to_crs
0,Congo,Likouala,"POLYGON ((17.60632 3.64125, 17.65328 3.63904, ...",epsg:4057


In [5]:
#Dzangha-Sangha = 4.669242 = area
dzangha_sangha = gpd.read_file('./shapes/RepofCongo/Dzangha-Sangha.shp')
dzangha_sangha = dzangha_sangha.drop(columns=['Shape_Leng', 'Shape_Area', 'ADM1_PCODE', 'ADM1_REF', 'ADM1ALT1FR', 'ADM1ALT2FR', 'ADM0_PCODE', 'date', 'validOn', 'validTo'])
dzangha_sangha.rename(columns={'ADM1_FR':'Region', 'ADM0_FR':'Country'}, inplace=True)
dzangha_sangha['Country'] = 'Congo'
dzangha_sangha = dzangha_sangha[['Country', 'Region', 'geometry']]
dzangha_sangha['to_crs'] = 'epsg:4057'
dzangha_sangha

Unnamed: 0,Country,Region,geometry,to_crs
0,Congo,Sangha,"POLYGON ((16.46330 2.70887, 16.47159 2.70536, ...",epsg:4057


## Regions in the United States
There are two regions of interest in the United States which we have selected, namely `Sierra National Forest` and `Los Padres National Forest`.

**Data manipulation task:** The data manipulation required is as below - 
(i) Drop the columns which are not required (ii) Rename the columns which are retained (Country, and Region respectively) (iii) Rename the country to 'USA' (iv) Reorder the columns through a dataframe subsetting operation (v) Insert a new column 'to_crs' with the epsg code of the transform required to convert the polygonal geometry from a geographic projection to a distance projection (in metres) for computing distances (such as areas and perimeters) and (vi) Rename the region accordingly to match the format of the dataframes for other countries;
Note: The correct transform code required for different parts of the globe can be obtained from epsg.io

- [EPSG.IO](https://epsg.io/)

In [6]:
#Sierra = 0.583227 = area; GIS_ACRES = 1418789.802; Shape_LEN = 6.597522
sierra = gpd.read_file('./shapes/USA/Sierra.shp')
sierra = sierra.drop(columns=['ADMINFORES', 'REGION', 'FORESTNUMB', 'FORESTORGC', 'GIS_ACRES', 'SHAPE_AREA', 'SHAPE_LEN'])
sierra['Country'] = 'USA'
sierra.rename(columns={'FORESTNAME':'Region'}, inplace=True)
sierra = sierra[['Country', 'Region', 'geometry']]
sierra['to_crs'] = 'epsg:26943'
sierra['Region'] = 'Sierra'
sierra

Unnamed: 0,Country,Region,geometry,to_crs
0,USA,Sierra,"MULTIPOLYGON (((-119.77867 37.46773, -119.7786...",epsg:26943


In [7]:
#Los Padres = 0.787442 = area
los_padres = gpd.read_file('./shapes/USA/LosPadres.shp')
los_padres = los_padres.drop(columns=['ADMINFORES', 'REGION', 'FORESTNUMB', 'FORESTORGC', 'GIS_ACRES', 'SHAPE_AREA', 'SHAPE_LEN'])
los_padres['Country'] = 'USA'
los_padres.rename(columns={'FORESTNAME':'Region'}, inplace=True)
los_padres = los_padres[['Country', 'Region', 'geometry']]
los_padres['to_crs'] = 'epsg:6423'
los_padres['Region'] = 'Los Padres'
los_padres

Unnamed: 0,Country,Region,geometry,to_crs
0,USA,Los Padres,"MULTIPOLYGON (((-120.74838 35.44383, -120.7483...",epsg:6423


# 3. Concatenate the data frames

**Pandas concatenation task:** The data manipulation required is as below - 
(i) Drop the columns which are not required (ii) Rename the columns which are retained (Country, and Region respectively) (iii) Rename the country to 'USA' (iv) Reorder the columns through a dataframe subsetting operation (v) Insert a new column 'to_crs' with the epsg code of the transform required to convert the polygonal geometry from a geographic projection to a distance projection (in metres) for computing distances (such as areas and perimeters) and (vi) Rename the region accordingly to match the format of the dataframes for other countries;

**Indexing task:** Once the different forests / plots have been concatenated into a single table, we create an index row called PID; This will serve as a `primary key` to join the polygon information with other information for future computations or visualizations

In [8]:
gdf = gpd.GeoDataFrame(pd.concat([likouala, dzangha_sangha, equateur, orientale, sierra, los_padres]))
gdf["PID"] = ['10000', '10001', '10002', '10003', '10004', '10005']
gdf

Unnamed: 0,Country,Region,geometry,to_crs,PID
0,Congo,Likouala,"POLYGON ((17.60632 3.64125, 17.65328 3.63904, ...",epsg:4057,10000
0,Congo,Sangha,"POLYGON ((16.46330 2.70887, 16.47159 2.70536, ...",epsg:4057,10001
0,Democratic Republic of the Congo,Équateur,"POLYGON ((19.60209 5.13152, 19.61267 5.12790, ...",epsg:4057,10002
0,Democratic Republic of the Congo,Orientale,"POLYGON ((25.55505 5.37646, 25.56033 5.37533, ...",epsg:4057,10003
0,USA,Sierra,"MULTIPOLYGON (((-119.77867 37.46773, -119.7786...",epsg:26943,10004
0,USA,Los Padres,"MULTIPOLYGON (((-120.74838 35.44383, -120.7483...",epsg:6423,10005


# 4. Save the table in different formats

**ESRI Shapefile** The ESRI shapefile is one of the most popular formats for geographic information; The shape file adds a geometry (polygon / feature) to a pandas data frame 

**Geojson file** The Geojson file is another very popular  file format for geographic vector data; The Geojson file adds geographic information (i.e. a geometry or a polygon) to a structured JSON table

**INSERT into PostGIS database table** Per the data architecture of this project, this table forms the basis for all the other tables; This table can be inserted into the T1 - Forest Inventory table

In [9]:
gdf.to_file("./shapes/shapes.shp")
gdf.to_file("./shapes/geojson/shapes.geojson", driver='GeoJSON')

---
Copyright © 2022 Carbon Capital Advisors. This content is licensed solely for personal use. Redistribution or publication of this material is strictly prohibited.