# Crop types (Switzerland) dataset extraction

Author: Thiago Nascimento (thiago.nascimento@eawag.ch)

This notebook is used to retrieve and concatenate the crop-types dataset into a table for publication alongisde the used data.

## Requirements
**Python:**

* Python>=3.6
* Jupyter
* geopandas=0.10.2
* numpy
* os
* pandas=2.1.3
* tqdm

Check the Github repository for an environment.yml (for conda environments) or requirements.txt (pip) file.

**Files:**

* Crops_Timeseries_1980_2019.xlsx


**Directory:**

* Clone the GitHub directory locally
* Place any third-data variables in their respective directory.
* ONLY update the "PATH" variable in the section "Configurations", with their relative path to the EStreams directory. 


## References
* 
## Observations
* 

# Import modules

In [1]:
import pandas as pd
import numpy as np
import tqdm as tqdm
import os
import warnings
import geopandas as gpd

# Configurations

In [7]:
# Only editable variables:
# Relative path to your local directory
PATH = ".."
# Suppress all warnings
warnings.filterwarnings("ignore")

path_data = r"C:\Users\nascimth\Documents\data\CAMELS_CH_Chem\data"

* #### The users should NOT change anything in the code below here. 

In [None]:
# Non-editable variables:
PATH_OUTPUT = r"results\Dataset\catchment_aggregated_data\agricultural_data"

# Set the directory:
os.chdir(PATH)

# Import data
* Full table

In [10]:
Crops_Timeseries_1980_2019 = gpd.read_file(path_data+'\\agriculture\Crops_Timeseries_1980_2019.csv')
Crops_Timeseries_1980_2019["bafu_id"] = Crops_Timeseries_1980_2019["gauge_id"]
Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2013,Orchard_2014,Orchard_2015,Orchard_2016,Orchard_2017,Orchard_2018,Orchard_2019,AREA,geometry,bafu_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,<Null>,<Null>,<Null>,...,0,0,0,0,0,0,0,0,,<Null>
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,<Null>,<Null>,<Null>,...,0,0,0,0,0,0,0,0,,<Null>
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112,Appenzell,<Null>,...,0,0,0,0,0,0,0,0,,2112
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161,Blatten_bei_Naters,<Null>,...,0,0,0,0,0,0,0,0,,2161
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256,Pontresina,<Null>,...,0,0,0,0,0,0,0,0,,2256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091,Rheinfelden-Messstation,<Null>,...,36793832.57,36228479.05,36101672.93,36140337.24,36214575.71,36443108.89,36419165.7,3416597665,,2091
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40746765.56,40158117.58,40124060.96,40058792.5,40026873.57,40192044.08,40122449.37,3615048351,,2615
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,<Null>,<Null>,<Null>,...,40843700.08,40257192.32,40219259.35,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,<Null>
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40843700.08,40257192.32,40219259.35,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,2613


In [11]:
# Network CAMELS_CH_Chem

network_camels_ch_chem = pd.read_csv(r"results\Dataset\gauges_metadata\camels_ch_chem_gauges_metadata.csv", encoding='utf-8')
#network_camels_ch_chem.set_index("basin_id", inplace=True)
network_camels_ch_chem

Unnamed: 0,gauge_id,sensor_id,nawaf_id,nawat_id,isot_id,gauge_name,water_body_name,gauge_easting,gauge_northing,gauge_lon,...,gauge_northing_nawaf,area_nawaf,foen_nawaf_dist,gauge_name_nawat,gauge_easting_nawat,gauge_northing_nawat,area_nawat,foen_nawat_dist,q_nawat_corrector,remarks
0,2009,2009.0,1837.0,1837.0,NIO04,Porte du Scex,Rhône,557660,133280,6.89,...,133280.0,5239.4,0.0,Porte du Scex,557660.0,133280.0,5239.402096,0.0,1.000000,
1,2011,2011.0,,4070.0,,Sion,Rhône,593770,118630,7.36,...,,,,Sion,593277.0,118449.0,3372.417040,0.0,1.000005,
2,2016,2016.0,1833.0,1833.0,NIO02,Brugg,Aare,657000,259360,8.19,...,259360.0,11681.3,0.0,Brugg,657000.0,259360.0,11681.282882,0.0,0.999999,
3,2018,2018.0,1835.0,1339.0,,Mellingen,Reuss,662830,252580,8.27,...,252580.0,3385.8,0.0,Gebenstorf,659450.0,258850.0,3420.503458,10.0,1.010250,
4,2019,2019.0,,1852.0,NIO01,Brienzwiler,Aare,649930,177380,8.09,...,,,,Brienzerseeeinlauf,646692.0,177000.0,555.808970,3.3,1.001097,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,2617,2617.0,,,,Müstair,Rom,830800,168700,10.45,...,,,,,,,,,,
111,2623,2623.0,,,,Oberwald,Rhone,669900,154075,8.35,...,,,,,,,,,,
112,2634,2634.0,6169.0,1181.0,,Emmen,Kleine Emme,663700,213630,8.28,...,213630.0,478.3,0.0,Emmen-Littau,663917.0,213356.0,478.277165,0.6,1.000188,station was moved from Littau to Emmen in 2013...
113,2635,2635.0,,,,"Einsiedeln, Gross",Grossbach,700710,218125,8.77,...,,,,,,,,,,Station moved in 2012?


In [12]:
Crops_Timeseries_1980_2019["nawaf_id"] = Crops_Timeseries_1980_2019["naduf_id"]
Crops_Timeseries_1980_2019["nawat_id"] = Crops_Timeseries_1980_2019["nawa_id"]
Crops_Timeseries_1980_2019["sensor_id"] = Crops_Timeseries_1980_2019["bafu_id"]

In [13]:
Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2016,Orchard_2017,Orchard_2018,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,<Null>,<Null>,<Null>,...,0,0,0,0,0,,<Null>,4409,<Null>,<Null>
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,<Null>,<Null>,<Null>,...,0,0,0,0,0,,<Null>,2064,<Null>,<Null>
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112,Appenzell,<Null>,...,0,0,0,0,0,,2112,<Null>,<Null>,2112
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161,Blatten_bei_Naters,<Null>,...,0,0,0,0,0,,2161,<Null>,<Null>,2161
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256,Pontresina,<Null>,...,0,0,0,0,0,,2256,<Null>,<Null>,2256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091,Rheinfelden-Messstation,<Null>,...,36140337.24,36214575.71,36443108.89,36419165.7,3416597665,,2091,<Null>,<Null>,2091
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40058792.5,40026873.57,40192044.08,40122449.37,3615048351,,2615,<Null>,<Null>,2615
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,<Null>,<Null>,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,<Null>,2078,<Null>,<Null>
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,2613,<Null>,<Null>,2613


In [14]:
Crops_Timeseries_1980_2019['bafu_id'] = Crops_Timeseries_1980_2019['bafu_id'].replace("<Null>", 0)
Crops_Timeseries_1980_2019['nawat_id'] = Crops_Timeseries_1980_2019['nawat_id'].replace("<Null>", 0)
Crops_Timeseries_1980_2019['nawaf_id'] = Crops_Timeseries_1980_2019['nawaf_id'].replace("<Null>", 0)
Crops_Timeseries_1980_2019['sensor_id'] = Crops_Timeseries_1980_2019['sensor_id'].replace("<Null>", 0)
Crops_Timeseries_1980_2019['gauge_id'] = Crops_Timeseries_1980_2019['gauge_id'].replace("<Null>", 0)


Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2016,Orchard_2017,Orchard_2018,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,0,<Null>,<Null>,...,0,0,0,0,0,,0,4409,0,0
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,0,<Null>,<Null>,...,0,0,0,0,0,,0,2064,0,0
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112,Appenzell,<Null>,...,0,0,0,0,0,,2112,0,0,2112
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161,Blatten_bei_Naters,<Null>,...,0,0,0,0,0,,2161,0,0,2161
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256,Pontresina,<Null>,...,0,0,0,0,0,,2256,0,0,2256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091,Rheinfelden-Messstation,<Null>,...,36140337.24,36214575.71,36443108.89,36419165.7,3416597665,,2091,0,0,2091
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40058792.5,40026873.57,40192044.08,40122449.37,3615048351,,2615,0,0,2615
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,0,<Null>,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,0,2078,0,0
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,2613,0,0,2613


In [15]:
network_camels_ch_chem["basin_id"] = network_camels_ch_chem["gauge_id"]

In [16]:
Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2016,Orchard_2017,Orchard_2018,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,0,<Null>,<Null>,...,0,0,0,0,0,,0,4409,0,0
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,0,<Null>,<Null>,...,0,0,0,0,0,,0,2064,0,0
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112,Appenzell,<Null>,...,0,0,0,0,0,,2112,0,0,2112
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161,Blatten_bei_Naters,<Null>,...,0,0,0,0,0,,2161,0,0,2161
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256,Pontresina,<Null>,...,0,0,0,0,0,,2256,0,0,2256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091,Rheinfelden-Messstation,<Null>,...,36140337.24,36214575.71,36443108.89,36419165.7,3416597665,,2091,0,0,2091
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40058792.5,40026873.57,40192044.08,40122449.37,3615048351,,2615,0,0,2615
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,0,<Null>,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,0,2078,0,0
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,2613,0,0,2613


In [17]:
Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2016,Orchard_2017,Orchard_2018,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,0,<Null>,<Null>,...,0,0,0,0,0,,0,4409,0,0
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,0,<Null>,<Null>,...,0,0,0,0,0,,0,2064,0,0
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112,Appenzell,<Null>,...,0,0,0,0,0,,2112,0,0,2112
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161,Blatten_bei_Naters,<Null>,...,0,0,0,0,0,,2161,0,0,2161
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256,Pontresina,<Null>,...,0,0,0,0,0,,2256,0,0,2256
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091,Rheinfelden-Messstation,<Null>,...,36140337.24,36214575.71,36443108.89,36419165.7,3416597665,,2091,0,0,2091
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40058792.5,40026873.57,40192044.08,40122449.37,3615048351,,2615,0,0,2615
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,0,<Null>,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,0,2078,0,0
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40157489.65,40126745.02,40292332.91,40209020.68,3619266142,,2613,0,0,2613


In [18]:
network_camels_ch_chem[['sensor_id', 'basin_id']]

Unnamed: 0,sensor_id,basin_id
0,2009.0,2009
1,2011.0,2011
2,2016.0,2016
3,2018.0,2018
4,2019.0,2019
...,...,...
110,2617.0,2617
111,2623.0,2623
112,2634.0,2634
113,2635.0,2635


In [19]:
network_camels_ch_chem["sensor_id"] = network_camels_ch_chem["sensor_id"].astype(float)
network_camels_ch_chem["basin_id"] = network_camels_ch_chem["basin_id"].astype(float)


network_camels_ch_chem[['sensor_id', 'basin_id']].dtypes

sensor_id    float64
basin_id     float64
dtype: object

In [20]:
Crops_Timeseries_1980_2019["sensor_id"] = Crops_Timeseries_1980_2019["sensor_id"].astype(float)
Crops_Timeseries_1980_2019["nawaf_id"] = Crops_Timeseries_1980_2019["nawaf_id"].astype(float)
Crops_Timeseries_1980_2019["nawat_id"] = Crops_Timeseries_1980_2019["nawat_id"].astype(float)
Crops_Timeseries_1980_2019["bafu_id"] = Crops_Timeseries_1980_2019["bafu_id"].astype(float)
Crops_Timeseries_1980_2019["gauge_id"] = Crops_Timeseries_1980_2019["gauge_id"].astype(float)

In [22]:
# Merge the DataFrames for achieving the bafu_id ias the last column
Crops_Timeseries_1980_2019 = pd.merge(Crops_Timeseries_1980_2019, network_camels_ch_chem[['sensor_id', 'basin_id']], on='sensor_id', how='left')
Crops_Timeseries_1980_2019 = pd.merge(Crops_Timeseries_1980_2019, network_camels_ch_chem[['nawaf_id', 'basin_id']], on='nawaf_id', how='left')
Crops_Timeseries_1980_2019 = pd.merge(Crops_Timeseries_1980_2019, network_camels_ch_chem[['nawat_id', 'basin_id']], on='nawat_id', how='left')

Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,0.0,<Null>,<Null>,...,0,0,,0.0,4409.0,0.0,0.0,,2112.0,
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,0.0,<Null>,<Null>,...,0,0,,0.0,2064.0,0.0,0.0,,2462.0,
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112.0,Appenzell,<Null>,...,0,0,,2112.0,0.0,0.0,2112.0,2112.0,,
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161.0,Blatten_bei_Naters,<Null>,...,0,0,,2161.0,0.0,0.0,2161.0,2161.0,,
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256.0,Pontresina,<Null>,...,0,0,,2256.0,0.0,0.0,2256.0,2256.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091.0,Rheinfelden-Messstation,<Null>,...,36419165.7,3416597665,,2091.0,0.0,0.0,2091.0,2091.0,,
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615.0,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40122449.37,3615048351,,2615.0,0.0,0.0,2615.0,2615.0,,
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,0.0,<Null>,<Null>,...,40209020.68,3619266142,,0.0,2078.0,0.0,0.0,,2613.0,
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613.0,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,40209020.68,3619266142,,2613.0,0.0,0.0,2613.0,2613.0,,


In [23]:
# Replace all 0s with NaN:
Crops_Timeseries_1980_2019.loc[:, ['gauge_id', "bafu_id", "nawaf_id", "nawat_id", "sensor_id"]] = Crops_Timeseries_1980_2019.loc[:, ['gauge_id', "bafu_id", "nawaf_id", "nawat_id", "sensor_id"]].replace(0, np.nan)

In [24]:
# Create the new 'basin_id' column based on the priority order
Crops_Timeseries_1980_2019['basin_id_new'] = np.nan
Crops_Timeseries_1980_2019['basin_id_new'] = np.where(
    Crops_Timeseries_1980_2019['gauge_id'].notna(), Crops_Timeseries_1980_2019['basin_id_x'],
    np.where(
        Crops_Timeseries_1980_2019['nawaf_id'].notna(), Crops_Timeseries_1980_2019['basin_id_y'],
        Crops_Timeseries_1980_2019['basin_id']
    )
)

# Display the updated DataFrame
Crops_Timeseries_1980_2019

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id,basin_id_new
0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,,<Null>,<Null>,...,0,,,4409.0,,,,2112.0,,2112.0
1,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,,<Null>,<Null>,...,0,,,2064.0,,,,2462.0,,2462.0
2,45,Polygon ZM,<Null>,<Null>,Sitter,43882.47139,wq_2112,2112.0,Appenzell,<Null>,...,0,,2112.0,,,2112.0,2112.0,,,2112.0
3,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161.0,Blatten_bei_Naters,<Null>,...,0,,2161.0,,,2161.0,2161.0,,,2161.0
4,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256.0,Pontresina,<Null>,...,0,,2256.0,,,2256.0,2256.0,,,2256.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,41,Polygon ZM,<Null>,<Null>,Rhein,1517910.509,wq_2091,2091.0,Rheinfelden-Messstation,<Null>,...,3416597665,,2091.0,,,2091.0,2091.0,,,2091.0
183,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615.0,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,3615048351,,2615.0,,,2615.0,2615.0,,,2615.0
184,22,Polygon ZM,2078,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,Rhein,1508069.481,nd_2078,,<Null>,<Null>,...,3619266142,,,2078.0,,,,2613.0,,2613.0
185,105,Polygon ZM,<Null>,<Null>,Rhein,1508069.481,wq_2613,2613.0,b'5765696c2d50616c6d7261696e6272fc636b655f285f...,<Null>,...,3619266142,,2613.0,,,2613.0,2613.0,,,2613.0


In [25]:
# Here we can check the data
Crops_Timeseries_1980_2019[Crops_Timeseries_1980_2019.basin_id_new.isna()]

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id,basin_id_new
12,85,Polygon ZM,<Null>,<Null>,Inn,155039.9227,wq_2403,2403.0,Cinuos-Chel,<Null>,...,0.0,,2403.0,,,2403.0,,,,
129,111,Polygon ZM,<Null>,<Null>,Limmat,373810.2313,wq_2622,2622.0,Gebenstorf,<Null>,...,89336743.29,,2622.0,,,2622.0,,,,


In [27]:
# Here we solve it manuallly
Crops_Timeseries_1980_2019.loc[129, ["basin_id_new"]] = 2622.0
Crops_Timeseries_1980_2019.loc[12, ["basin_id_new"]] = 2403.0

Crops_Timeseries_1980_2019[Crops_Timeseries_1980_2019.basin_id_new.isna()]

Unnamed: 0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id,basin_id_new


In [28]:
# Drop duplicates based on 'gauge_id'
Crops_Timeseries_1980_2019_unique = Crops_Timeseries_1980_2019.drop_duplicates(subset="basin_id_new")
Crops_Timeseries_1980_2019_unique.set_index("basin_id_new", inplace=True)
Crops_Timeseries_1980_2019_unique

Unnamed: 0_level_0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id
basin_id_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2112.0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,,<Null>,<Null>,...,0,0,,,4409.0,,,,2112.0,
2462.0,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,,<Null>,<Null>,...,0,0,,,2064.0,,,,2462.0,
2161.0,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161.0,Blatten_bei_Naters,<Null>,...,0,0,,2161.0,,,2161.0,2161.0,,
2256.0,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256.0,Pontresina,<Null>,...,0,0,,2256.0,,,2256.0,2256.0,,
2269.0,67,Polygon ZM,<Null>,<Null>,Lonza,41312.44544,wq_2269,2269.0,Blatten,<Null>,...,0,0,,2269.0,,,2269.0,2269.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2113.0,46,Polygon ZM,<Null>,<Null>,Aare,957863.3348,wq_2113,2113.0,Felsenau-K.W._Klingnau,<Null>,...,12076962.16,2517900566,,2113.0,,,2113.0,2113.0,,
2130.0,11,Polygon ZM,1826,Laufenburg,Rhein_(Oberwasser),1520639.617,nd_1826,,<Null>,<Null>,...,35545138.59,3351300894,,,1826.0,,,,2130.0,
2091.0,8,Polygon ZM,1827,Rheinfelden-Messstation,Rhein,1517910.509,nd_1827,,<Null>,<Null>,...,36419165.7,3416597665,,,1827.0,,,,2091.0,
2615.0,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615.0,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40122449.37,3615048351,,2615.0,,,2615.0,2615.0,,


In [29]:
Crops_Timeseries_1980_2019_unique.drop(2622.0, axis=0, inplace=True)
Crops_Timeseries_1980_2019_unique.drop(2403.0, axis=0, inplace=True)

Crops_Timeseries_1980_2019_unique

Unnamed: 0_level_0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id
basin_id_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2112.0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,,<Null>,<Null>,...,0,0,,,4409.0,,,,2112.0,
2462.0,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,,<Null>,<Null>,...,0,0,,,2064.0,,,,2462.0,
2161.0,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161.0,Blatten_bei_Naters,<Null>,...,0,0,,2161.0,,,2161.0,2161.0,,
2256.0,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256.0,Pontresina,<Null>,...,0,0,,2256.0,,,2256.0,2256.0,,
2269.0,67,Polygon ZM,<Null>,<Null>,Lonza,41312.44544,wq_2269,2269.0,Blatten,<Null>,...,0,0,,2269.0,,,2269.0,2269.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2113.0,46,Polygon ZM,<Null>,<Null>,Aare,957863.3348,wq_2113,2113.0,Felsenau-K.W._Klingnau,<Null>,...,12076962.16,2517900566,,2113.0,,,2113.0,2113.0,,
2130.0,11,Polygon ZM,1826,Laufenburg,Rhein_(Oberwasser),1520639.617,nd_1826,,<Null>,<Null>,...,35545138.59,3351300894,,,1826.0,,,,2130.0,
2091.0,8,Polygon ZM,1827,Rheinfelden-Messstation,Rhein,1517910.509,nd_1827,,<Null>,<Null>,...,36419165.7,3416597665,,,1827.0,,,,2091.0,
2615.0,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615.0,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40122449.37,3615048351,,2615.0,,,2615.0,2615.0,,


In [30]:
Crops_Timeseries_1980_2019_unique

Unnamed: 0_level_0,OBJECTID *,Shape *,naduf_id,naduf_name,water_body,Shape_Leng,ID *,gauge_id,gauge_name,nawa_id,...,Orchard_2019,AREA,geometry,bafu_id,nawaf_id,nawat_id,sensor_id,basin_id_x,basin_id_y,basin_id
basin_id_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2112.0,10,Polygon ZM,4409,Appenzell,Sitter,43882.47139,nd_4409,,<Null>,<Null>,...,0,0,,,4409.0,,,,2112.0,
2462.0,18,Polygon ZM,2064,S-chanf,Inn,130527.3201,nd_2064,,<Null>,<Null>,...,0,0,,,2064.0,,,,2462.0,
2161.0,55,Polygon ZM,<Null>,<Null>,Massa,68650.25544,wq_2161,2161.0,Blatten_bei_Naters,<Null>,...,0,0,,2161.0,,,2161.0,2161.0,,
2256.0,65,Polygon ZM,<Null>,<Null>,Rosegbach,40922.44567,wq_2256,2256.0,Pontresina,<Null>,...,0,0,,2256.0,,,2256.0,2256.0,,
2269.0,67,Polygon ZM,<Null>,<Null>,Lonza,41312.44544,wq_2269,2269.0,Blatten,<Null>,...,0,0,,2269.0,,,2269.0,2269.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2113.0,46,Polygon ZM,<Null>,<Null>,Aare,957863.3348,wq_2113,2113.0,Felsenau-K.W._Klingnau,<Null>,...,12076962.16,2517900566,,2113.0,,,2113.0,2113.0,,
2130.0,11,Polygon ZM,1826,Laufenburg,Rhein_(Oberwasser),1520639.617,nd_1826,,<Null>,<Null>,...,35545138.59,3351300894,,,1826.0,,,,2130.0,
2091.0,8,Polygon ZM,1827,Rheinfelden-Messstation,Rhein,1517910.509,nd_1827,,<Null>,<Null>,...,36419165.7,3416597665,,,1827.0,,,,2091.0,
2615.0,106,Polygon ZM,<Null>,<Null>,Rhein,1507467.48,wq_2615,2615.0,b'426173656c2d4b6c696e67656e7468616c66e4687265',<Null>,...,40122449.37,3615048351,,2615.0,,,2615.0,2615.0,,


In [31]:
crops_df = pd.DataFrame(index=network_camels_ch_chem.basin_id.astype(float))

crops_df[Crops_Timeseries_1980_2019_unique.columns[13:283]] = Crops_Timeseries_1980_2019_unique[Crops_Timeseries_1980_2019_unique.columns[13:283]]

In [32]:
# Reset the index to have basin_id as a regular column
df = crops_df.reset_index()

# Melt dataframe to long format for easier manipulation
df_long = df.melt(id_vars='basin_id', var_name='variable_year', value_name='value')

# Split variable_year into 'variable' and 'year'
df_long[['variable', 'year']] = df_long['variable_year'].str.rsplit("_", n=1, expand=True)
df_long.value = df_long.value.astype(float)

# Pivot the dataframe to have a neat time series structure
df_pivot = df_long.pivot_table(
    index=['basin_id', 'year'], columns='variable', values='value'
).reset_index()

# Loop over each unique basin_id and save as separate CSV files
for basin_id, basin_df in tqdm.tqdm(df_long.groupby('basin_id')):
    # Pivot to wide format (variables as columns, years as rows)
    df_pivot = basin_df.pivot(index='year', columns='variable', values='value')
    df_pivot.columns = ['total_arable', 'cereal', 'grapevine', 'maize', 'orchard', 'potato', 'pulse',
       'rapeseed', 'sugarbeet', 'vegetable']
    
    df_pivot = df_pivot[['cereal', 'maize', 'sugarbeet', 'potato', 'rapeseed', 'pulse',
        'vegetable', 'total_arable', 'grapevine', 'orchard']]
    
    df_pivot.index = df_pivot.index.astype(int)

    # Generate a full range of years from 1980 to 2019
    full_range = pd.DataFrame(index=range(1980, 2020))

    # Reindex the dataframe to include all years
    df_pivot_interpolated = df_pivot.reindex(full_range.index)

    # Interpolate missing values
    df_pivot_interpolated = df_pivot_interpolated.interpolate(method='linear')


    df_pivot_interpolated = df_pivot_interpolated.round(4)
    df_pivot_interpolated.index.name = "date"

    # Repeat the last row for 2020
    df_pivot_interpolated.loc[2020] = df_pivot_interpolated.loc[2019]

    df_pivot_interpolated.to_csv(PATH_OUTPUT + "/camels_ch_chem_swisscrops_"+str(int(basin_id))+".csv", encoding='latin')

100%|██████████| 115/115 [00:00<00:00, 120.17it/s]


# End