In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd

import swisslandstats as sls

In [3]:
lu_ldf = sls.read_csv('data/AREA_NOLU04_46_171129.csv')
lc_ldf = sls.read_csv('data/AREA_NOLC04_27_171129.csv')

## Merge the two data frames

In [4]:
lulc_ldf = sls.merge(lu_ldf, lc_ldf)

## Clip it by the extent of the Vaud Canton

In [7]:
lulc_ldf = lulc_ldf.clip_by_nominatim('Vaud, Switzerland')

## Reclassify it into 18 categories

In [12]:
# Delete the 'R' char from the 09 LU/LC columnns

lulc_ldf.columns = lulc_ldf.columns.map(lambda column: column.replace('09R', '09'))

In [13]:
import json

with open('data/lulc_reclassify_map.json', 'r') as fp:
    lulc_reclassify_map = json.load(fp)

In [14]:
def lulc_reclassify(lulc_ldf, left_column, right_column, lulc_reclassify_map):
    # int -1 vs. float np.nan
    lulc_reclassify_ser = pd.Series(-1, lulc_ldf.index, dtype=int)
    for left_values, right_values, new_value in lulc_reclassify_map:
        left_values_df = lulc_ldf[lulc_ldf[left_column].isin(left_values)]
        lulc_reclassify_ser.loc[
            left_values_df[left_values_df[right_column].isin(right_values)].index
        ] = new_value
    return lulc_reclassify_ser

In [15]:
lu_columns = lulc_ldf.columns[
    lulc_ldf.columns.str.startswith('LU') & lulc_ldf.columns.str.endswith('46')]
lc_columns = lulc_ldf.columns[
    lulc_ldf.columns.str.startswith('LC') & lulc_ldf.columns.str.endswith('27')]

In [16]:
for lu_column, lc_column in zip(lu_columns, lc_columns):
    year_str = lu_column[2:4]
    lulc_ldf['LULC%s_18' % year_str] = lulc_reclassify(
        lulc_ldf, 'LC%s_27' % year_str, 'LU%s_46' % year_str, lulc_reclassify_map)

In [18]:
# Just keep the columns that we care about, and save resulting dataframe
lulc_ldf[np.concatenate((
    ['X', 'Y'],
    lulc_ldf.columns[lulc_ldf.columns.str.startswith('FJ')],
    lulc_ldf.columns[lulc_ldf.columns.str.startswith('LULC')]))].to_csv('data/vaud_lulc.csv')