# Edit and clean Vegetation Type data
Optional. Created after analysis was finished, mainly to create a map in QGIS.

In [1]:
import geopandas as gpd
import json

In [2]:
### File path to raw Vegetation Type data (AR18x18)
file_path = \
"C:/Users/lassetk/OneDrive - Universitetet i Oslo/DataSets/VegetationData/AR18x18/Edited/"

In [3]:
# Load shapefiles for presence points...
vt_pres_shp = gpd.read_file(
    file_path+"VT_presences.shp"
)

In [4]:
# Load AR18x18 polygons
vt_poly_shp = gpd.read_file(
    file_path+"AR18x18_fixedgeom.shp"
)

In [5]:
# Load VT dict
with open('../data/dict/vt_classes.json', encoding="utf-8") as vt_json:
    vt_dict = json.load(vt_json)

---
## 1 Add ecosystem type column to AR18x18 polygons
Used to create AR18x18 map with QGIS for the publication.

In [6]:
# Create a list for a new column with VT polygon ecosystem groups
ecosys_list = [None] * vt_poly_shp.shape[0]
vt_names = vt_dict.keys()

# Loop through polygons, determine ecosystem type from dictionary
for row_idx, series in vt_poly_shp.iterrows():
    cur_vt = series["VEG1"]
    ecosys_list[row_idx] = "Excluded" if ((cur_vt not in vt_names) or (vt_dict[cur_vt]['excluded'])) \
    else vt_dict[cur_vt]['ecosys_group']

In [7]:
vt_poly_shp['Ecosystem_type'] = ecosys_list
vt_poly_shp['Ecosystem_type']

0                    Forest
1                    Forest
2                    Forest
3                  Excluded
4                     Other
                ...        
42518    Heath and mountain
42519                 Other
42520                 Other
42521    Heath and mountain
42522                 Other
Name: Ecosystem_type, Length: 42523, dtype: object

In [None]:
vt_poly_shp.to_file(
    '../data/processed/ar18x18_ecosys_column.geojson',
    driver="GeoJSON"
)

---
# 2 Clean presence point data

In [9]:
vt_pres_shp.head()

Unnamed: 0,TARGET_FID,CID,POLY_,POLY_ID,AREA,PERIMETER,FTEMA,KARTSIGN,TILLEGG1,TILLEGG2,...,SONE,SIGN,FYLKEID,VEG1,VEG2,POINT_X,POINT_Y,layer,path,geometry
0,19,20,23,6834,23548.03271,866.42952,4351,10b+>,+>,,...,1,10b+>,1,10b,,269984.146644,6552055.0,10ab_p,D:/Data/Paper_1/ModelTraining/Data/AR18x18/Pre...,POINT (269984.147 6552054.941)
1,5877,5878,8210,34147,11870.23242,455.19407,4351,10b>,>,,...,1,10b>,6,10b,,198194.6264,6713976.0,10ab_p,D:/Data/Paper_1/ModelTraining/Data/AR18x18/Pre...,POINT (198194.626 6713975.815)
2,7454,7455,10068,41433,22793.92828,660.48053,4351,10b>,>,,...,1,10b>,8,10b,,161519.319078,6659814.0,10ab_p,D:/Data/Paper_1/ModelTraining/Data/AR18x18/Pre...,POINT (161519.319 6659813.583)
3,7494,7495,10114,41479,34560.21155,1658.60579,4351,10b,,,...,1,10b,8,10b,,180177.449793,6533863.0,10ab_p,D:/Data/Paper_1/ModelTraining/Data/AR18x18/Pre...,POINT (180177.450 6533862.856)
4,7881,7882,10606,49590,5216.48659,313.5788,4351,10b,,,...,1,10b,10,10b,,18290.733027,6533729.0,10ab_p,D:/Data/Paper_1/ModelTraining/Data/AR18x18/Pre...,POINT (18290.733 6533728.901)


In [15]:
vars(vt_pres_shp)

{'_is_copy': None,
 '_mgr': BlockManager
 Items: Index(['TARGET_FID', 'CID', 'POLY_', 'POLY_ID', 'AREA', 'PERIMETER', 'FTEMA',
        'KARTSIGN', 'TILLEGG1', 'TILLEGG2', 'S1', 'S2', 'FLATE_NR', 'SONE',
        'SIGN', 'FYLKEID', 'VEG1', 'VEG2', 'POINT_X', 'POINT_Y', 'layer',
        'path', 'geometry'],
       dtype='object')
 Axis 1: RangeIndex(start=0, stop=25679, step=1)
 FloatBlock: [ 4  5 18 19], 4 x 25679, dtype: float64
 IntBlock: [ 6 12 13], 3 x 25679, dtype: int64
 ObjectBlock: [ 0  1  2  3  7  8  9 10 11 14 15 16 17 20 21], 15 x 25679, dtype: object
 ExtensionBlock: slice(22, 23, 1), 1 x 25679, dtype: geometry,
 '_item_cache': {'layer': 0        10ab
  1        10ab
  2        10ab
  3        10ab
  4        10ab
           ... 
  25674      9e
  25675      9e
  25676      9e
  25677      9e
  25678      9e
  Name: layer, Length: 25679, dtype: object},
 '_attrs': {},
 '_flags': <Flags(allows_duplicate_labels=True)>,
 '_crs': <Projected CRS: EPSG:32633>
 Name: WGS 84 / UTM zo

In [10]:
# Remove "_p" suffix in VT class column
vt_pres_shp['layer'] = [x.split("_")[0] for x in vt_pres_shp['layer']]

In [11]:
# Remove duplicate entries ('2e', '2f' present as well as '2ef')
set(vt_pres_shp['layer'])

{'10ab',
 '10c',
 '11b',
 '12a',
 '12b',
 '12c',
 '1ab',
 '1c',
 '2a',
 '2b',
 '2c',
 '2d',
 '2e',
 '2ef',
 '2f',
 '2g',
 '3ab',
 '4a',
 '4b',
 '4c',
 '4e',
 '4g',
 '5ab',
 '6a',
 '6b',
 '7a',
 '7b',
 '7c',
 '8a',
 '8b',
 '8cd',
 '9ad',
 '9bc',
 '9e'}

In [12]:
duplicate_labels = ('2e', '2f')

duplicate_mask = [x in duplicate_labels for x in vt_pres_shp['layer']]

vt_pres_shp_clean = vt_pres_shp.drop(
    vt_pres_shp[duplicate_mask].index
)

In [13]:
set(vt_pres_shp_clean['layer'])

{'10ab',
 '10c',
 '11b',
 '12a',
 '12b',
 '12c',
 '1ab',
 '1c',
 '2a',
 '2b',
 '2c',
 '2d',
 '2ef',
 '2g',
 '3ab',
 '4a',
 '4b',
 '4c',
 '4e',
 '4g',
 '5ab',
 '6a',
 '6b',
 '7a',
 '7b',
 '7c',
 '8a',
 '8b',
 '8cd',
 '9ad',
 '9bc',
 '9e'}

In [25]:
# Save as GeoJSON
vt_pres_shp_clean.to_file(
    '../data/processed/ar18x18_horvath_pres_points.geojson',
    driver='GeoJSON'
)

In [16]:
# Save as Shapefile
vt_pres_shp_clean.to_file(
    '../data/processed/ar18x18_horvath_pres_points_clean',
    driver='ESRI Shapefile'
)