# Livestock dataset extraction

Author: Thiago Nascimento (thiago.nascimento@eawag.ch)

This notebook is used to retrieve and concatenate the livestock dataset into a table for publication alongisde the used data.

## Requirements
**Python:**

* Python>=3.6
* Jupyter
* geopandas=0.10.2
* numpy
* os
* pandas=2.1.3
* tqdm

Check the Github repository for an environment.yml (for conda environments) or requirements.txt (pip) file.

**Files:**

* GVE_Catchments.shp


**Directory:**

* Clone the GitHub directory locally
* Place any third-data variables in their respective directory.
* ONLY update the "PATH" variable in the section "Configurations", with their relative path to the EStreams directory. 


## References
* 
## Observations
* Part of the data is interpolated. 

# Import modules

In [1]:
import pandas as pd
import numpy as np
import tqdm as tqdm
import os
import warnings
import geopandas as gpd

# Configurations

In [4]:
# Only editable variables:
# Relative path to your local directory
PATH = ".."
# Suppress all warnings
warnings.filterwarnings("ignore")

path_data = r"C:\Users\nascimth\Documents\data\CAMELS_CH_Chem\data"

* #### The users should NOT change anything in the code below here. 

In [3]:
# Non-editable variables:
PATH_OUTPUT = "results\Dataset\catchment_aggregated_data\livestock_data"

# Set the directory:
os.chdir(PATH)

# Import data

In [5]:
catchments_gve = gpd.read_file(path_data+'\shapefile_gve\GVE_Catchments.shp')
catchments_gve["bafu_id"] = catchments_gve["gauge_id"]
catchments_gve

Unnamed: 0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2015_h,gve_2016_h,gve_2017_h,gve_2018_h,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id
0,1837,Porte_du_Scex,Rhône,581764.280706,nd_1837,0.0,,0.0,,581812.194190,...,0.071078,0.070785,0.072441,0.072648,0.071371,0.071871,0.071714,0.071609,"POLYGON Z ((2674253.038 1167429.881 0.000, 267...",0.0
1,1833,Brugg,Aare,827826.334191,nd_1833,0.0,,0.0,,827842.590414,...,0.494300,0.490025,0.487356,0.486703,0.480337,0.477349,0.479700,0.484112,"POLYGON Z ((2655969.680 1259695.589 0.000, 265...",0.0
2,1835,Mellingen,Reuss,423443.101185,nd_1835,0.0,,0.0,,423447.043212,...,0.429804,0.430639,0.427754,0.429399,0.425385,0.422040,0.424314,0.422356,"POLYGON Z ((2663723.380 1252919.068 0.000, 266...",0.0
3,1823,Andelfingen,Thur,278840.437214,nd_1823,0.0,,0.0,,278852.096268,...,0.878186,0.875332,0.873078,0.873069,0.858348,0.855597,0.869395,0.868562,"POLYGON Z ((2719445.088 1279260.056 0.000, 271...",0.0
4,1842,Martina,Inn,293426.137861,nd_1842,0.0,,0.0,,293431.436471,...,0.058669,0.057852,0.058343,0.058582,0.057470,0.055563,0.055084,0.055510,"POLYGON Z ((2818919.419 1201129.962 0.000, 281...",0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,0,,Simme,151671.931781,nw_1919,0.0,,1919.0,Latterbach,151676.136857,...,0.259554,0.261628,0.261090,0.259942,0.255896,0.251801,0.254637,0.256755,"POLYGON Z ((2605018.868 1172839.620 0.000, 260...",0.0
183,0,,Sorne,82837.276182,nw_1504,0.0,,1504.0,Delémont,82838.882339,...,0.441431,0.444679,0.441823,0.438026,0.431327,0.426675,0.424547,0.431703,"POLYGON Z ((2584725.396 1248420.902 0.000, 258...",0.0
184,0,,Aare,745868.466423,nw_1142,0.0,,1142.0,Nidau,745881.109854,...,0.385930,0.381925,0.379199,0.378388,0.374406,0.371942,0.373288,0.379099,"POLYGON Z ((2597091.802 1231250.148 0.000, 259...",0.0
185,0,,Drance,138710.844326,nw_3860,0.0,,3860.0,Martigny,138730.554523,...,0.073408,0.073306,0.073382,0.074332,0.072997,0.072378,0.072555,0.073715,"POLYGON Z ((2583409.306 1108018.362 0.000, 258...",0.0


In [6]:
# Network CAMELS_CH_Chem
network_camels_ch_chem = pd.read_excel(path_data+"\CAMELS_CH_chem_stations_short_v3.xlsx", sheet_name='all_5')
#network_camels_ch_chem.set_index("basin_id", inplace=True)
network_camels_ch_chem

Unnamed: 0,basin_id,bafu_id,naduf_id,nawa_id,isot_id,hydro_station,hydrowater_body,lon,lat,area_camels,...,lon_naduf,lat_naduf,area_naduf,bafu_naduf_distance,nawa_station,lon_nawa,lat_nawa,area_nawa,bafu_nawa_distance,remarks.1
0,2009,2009.0,1837.0,1837.0,NIO04,Porte du Scex,Rhône,557660,133280,5239.4,...,557660.0,133280.0,5239.4,0.0,Porte du Scex,557660.0,133280.0,5239.402096,0.0,
1,2011,2011.0,,4070.0,,Sion,Rhône,593770,118630,3372.4,...,,,,,Sion,593277.0,118449.0,3372.417040,0.0,
2,2016,2016.0,1833.0,1833.0,NIO02,Brugg,Aare,657000,259360,11681.3,...,657000.0,259360.0,11681.3,0.0,Brugg,657000.0,259360.0,11681.282882,0.0,
3,2018,2018.0,1835.0,1339.0,,Mellingen,Reuss,662830,252580,3385.8,...,662830.0,252580.0,3385.8,0.0,Gebenstorf,659450.0,258850.0,3420.503458,10.0,
4,2019,2019.0,,1852.0,NIO01,Brienzwiler,Aare,649930,177380,555.2,...,,,,,Brienzerseeeinlauf,646692.0,177000.0,555.808970,3.3,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,2617,2617.0,,,,Müstair,Rom,830800,168700,128.6,...,,,,,,,,,,
111,2623,2623.0,,,,Oberwald,Rhone,669900,154075,93.3,...,,,,,,,,,,
112,2634,2634.0,6169.0,1181.0,,Emmen,Kleine Emme,663700,213630,478.3,...,663700.0,213630.0,478.3,0.0,Emmen-Littau,663917.0,213356.0,478.277165,0.6,
113,2635,2635.0,,,,"Einsiedeln, Gross",Grossbach,700710,218125,8.9,...,,,,,,,,,,


In [7]:
# The majority is primarly formed by the bafu id, so here we check the oens that ARE NOT:
network_camels_ch_chem[network_camels_ch_chem.bafu_id.isna()]

Unnamed: 0,basin_id,bafu_id,naduf_id,nawa_id,isot_id,hydro_station,hydrowater_body,lon,lat,area_camels,...,lon_naduf,lat_naduf,area_naduf,bafu_naduf_distance,nawa_station,lon_nawa,lat_nawa,area_nawa,bafu_nawa_distance,remarks.1
10,2053,,,3860.0,,"Martigny, Pont de Rossettan",Drance,570930,105200,675.7,...,,,,,Martigny,570930.0,105200.0,675.654142,0.0,
12,2063,,,1108.0,,Murgenthal,Aare,629665,235150,10059.4,...,,,,,Murgenthal,629760.0,235242.0,10059.355199,0.3,
19,2099,,,1053.0,,"Zürich, Unterhard",Limmat,682055,249430,2173.9,...,,,,,Hönggersteg,679609.0,250356.0,2186.067125,2.7,
20,2102,,,5023.0,,Sarnen,Sarner Aa,661460,194220,269.3,...,,,,,"Sarnen, Kägiswil",662600.0,195770.0,284.246739,2.1,
26,2122,,,1494.0,,"Moutier, La Charrue",Birse,595740,237010,185.8,...,,,,,Choindez- Aval de la Roche St-Jean,596293.0,240496.0,198.704397,3.9,
27,2125,,,1265.0,,Frauenthal,Lorze,674715,229845,261.7,...,,,,,Frauenthal,674725.0,229850.0,261.723315,0.0,
35,2155,,,6315.0,,"Wiler, Limpachmündung",Emme,608220,223240,924.1,...,,,,,"Gerlafingen, Brücke nach Biberist",609527.0,225566.0,924.578661,3.3,Q from id_0155
37,2160,,,2025.0,,"Broc, Château d'en bas",Sarine,573520,161345,636.2,...,,,,,"Broc, Halte des Marches",573520.0,161345.0,636.248285,0.0,
45,2202,,,1307.0,,Liestal,Ergolz,622270,259750,261.2,...,,,,,"Augst, Autobahn",620950.0,264930.0,284.647126,5.9,
46,2203,,,2115.0,,Aigle,Grande Eau,563960,129860,131.6,...,,,,,"Aigle, Autoroute",561112.0,130518.0,144.375806,3.0,


In [8]:
# Merge the DataFrames for achieving the bafu_id ias the last column
catchments_gve = pd.merge(catchments_gve, network_camels_ch_chem[['bafu_id', 'basin_id']], on='bafu_id', how='left')
catchments_gve = pd.merge(catchments_gve, network_camels_ch_chem[['nawa_id', 'basin_id']], on='nawa_id', how='left')
catchments_gve = pd.merge(catchments_gve, network_camels_ch_chem[['naduf_id', 'basin_id']], on='naduf_id', how='left')

catchments_gve

Unnamed: 0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2018_h,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id
0,1837,Porte_du_Scex,Rhône,581764.280706,nd_1837,0.0,,0.0,,581812.194190,...,0.072648,0.071371,0.071871,0.071714,0.071609,"POLYGON Z ((2674253.038 1167429.881 0.000, 267...",0.0,,,2009.0
1,1833,Brugg,Aare,827826.334191,nd_1833,0.0,,0.0,,827842.590414,...,0.486703,0.480337,0.477349,0.479700,0.484112,"POLYGON Z ((2655969.680 1259695.589 0.000, 265...",0.0,,,2016.0
2,1835,Mellingen,Reuss,423443.101185,nd_1835,0.0,,0.0,,423447.043212,...,0.429399,0.425385,0.422040,0.424314,0.422356,"POLYGON Z ((2663723.380 1252919.068 0.000, 266...",0.0,,,2018.0
3,1823,Andelfingen,Thur,278840.437214,nd_1823,0.0,,0.0,,278852.096268,...,0.873069,0.858348,0.855597,0.869395,0.868562,"POLYGON Z ((2719445.088 1279260.056 0.000, 271...",0.0,,,2044.0
4,1842,Martina,Inn,293426.137861,nd_1842,0.0,,0.0,,293431.436471,...,0.058582,0.057470,0.055563,0.055084,0.055510,"POLYGON Z ((2818919.419 1201129.962 0.000, 281...",0.0,,,2067.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,0,,Simme,151671.931781,nw_1919,0.0,,1919.0,Latterbach,151676.136857,...,0.259942,0.255896,0.251801,0.254637,0.256755,"POLYGON Z ((2605018.868 1172839.620 0.000, 260...",0.0,,2488.0,
183,0,,Sorne,82837.276182,nw_1504,0.0,,1504.0,Delémont,82838.882339,...,0.438026,0.431327,0.426675,0.424547,0.431703,"POLYGON Z ((2584725.396 1248420.902 0.000, 258...",0.0,,2640.0,
184,0,,Aare,745868.466423,nw_1142,0.0,,1142.0,Nidau,745881.109854,...,0.378388,0.374406,0.371942,0.373288,0.379099,"POLYGON Z ((2597091.802 1231250.148 0.000, 259...",0.0,,2029.0,
185,0,,Drance,138710.844326,nw_3860,0.0,,3860.0,Martigny,138730.554523,...,0.074332,0.072997,0.072378,0.072555,0.073715,"POLYGON Z ((2583409.306 1108018.362 0.000, 258...",0.0,,2053.0,


In [9]:
# Replace all 0s with NaN:
catchments_gve.loc[:, ['gauge_id', "naduf_id", "nawa_id"]] = catchments_gve.loc[:, ['gauge_id', "naduf_id", "nawa_id"]].replace(0, np.nan)

In [10]:
# Create the new 'basin_id' column based on the priority order
catchments_gve['basin_id_new'] = np.nan
catchments_gve['basin_id_new'] = np.where(
    catchments_gve['gauge_id'].notna(), catchments_gve['basin_id_x'],
    np.where(
        catchments_gve['nawa_id'].notna(), catchments_gve['basin_id_y'],
        catchments_gve['basin_id']
    )
)

# Display the updated DataFrame
catchments_gve

Unnamed: 0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id,basin_id_new
0,1837.0,Porte_du_Scex,Rhône,581764.280706,nd_1837,,,,,581812.194190,...,0.071371,0.071871,0.071714,0.071609,"POLYGON Z ((2674253.038 1167429.881 0.000, 267...",0.0,,,2009.0,2009.0
1,1833.0,Brugg,Aare,827826.334191,nd_1833,,,,,827842.590414,...,0.480337,0.477349,0.479700,0.484112,"POLYGON Z ((2655969.680 1259695.589 0.000, 265...",0.0,,,2016.0,2016.0
2,1835.0,Mellingen,Reuss,423443.101185,nd_1835,,,,,423447.043212,...,0.425385,0.422040,0.424314,0.422356,"POLYGON Z ((2663723.380 1252919.068 0.000, 266...",0.0,,,2018.0,2018.0
3,1823.0,Andelfingen,Thur,278840.437214,nd_1823,,,,,278852.096268,...,0.858348,0.855597,0.869395,0.868562,"POLYGON Z ((2719445.088 1279260.056 0.000, 271...",0.0,,,2044.0,2044.0
4,1842.0,Martina,Inn,293426.137861,nd_1842,,,,,293431.436471,...,0.057470,0.055563,0.055084,0.055510,"POLYGON Z ((2818919.419 1201129.962 0.000, 281...",0.0,,,2067.0,2067.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,,,Simme,151671.931781,nw_1919,,,1919.0,Latterbach,151676.136857,...,0.255896,0.251801,0.254637,0.256755,"POLYGON Z ((2605018.868 1172839.620 0.000, 260...",0.0,,2488.0,,2488.0
183,,,Sorne,82837.276182,nw_1504,,,1504.0,Delémont,82838.882339,...,0.431327,0.426675,0.424547,0.431703,"POLYGON Z ((2584725.396 1248420.902 0.000, 258...",0.0,,2640.0,,2640.0
184,,,Aare,745868.466423,nw_1142,,,1142.0,Nidau,745881.109854,...,0.374406,0.371942,0.373288,0.379099,"POLYGON Z ((2597091.802 1231250.148 0.000, 259...",0.0,,2029.0,,2029.0
185,,,Drance,138710.844326,nw_3860,,,3860.0,Martigny,138730.554523,...,0.072997,0.072378,0.072555,0.073715,"POLYGON Z ((2583409.306 1108018.362 0.000, 258...",0.0,,2053.0,,2053.0


In [11]:
# Here we can check the data
catchments_gve[catchments_gve.basin_id_new.isna()]

Unnamed: 0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id,basin_id_new
84,,,Inn,155039.922745,wq_2403,2403.0,Cinuos-Chel,,,155044.174307,...,0.053227,0.05203,0.051674,0.051282,"POLYGON Z ((2792289.295 1174479.955 0.000, 279...",2403.0,,,,
110,,,Limmat,373810.231279,wq_2622,2622.0,Gebenstorf,,,373815.227518,...,0.30318,0.300006,0.30036,0.301248,"POLYGON Z ((2663067.290 1262049.881 0.000, 266...",2622.0,,,,


In [12]:
# Here we solve it manuallly
catchments_gve.loc[84, ["basin_id_new"]] = 2403.0
catchments_gve.loc[110, ["basin_id_new"]] = 2622.0

catchments_gve[catchments_gve.basin_id_new.isna()]

Unnamed: 0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id,basin_id_new


In [13]:
# Drop duplicates based on 'gauge_id'
catchments_gve_unique = catchments_gve.drop_duplicates(subset="basin_id_new")
catchments_gve_unique.set_index("basin_id_new", inplace=True)
catchments_gve_unique

Unnamed: 0_level_0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2018_h,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id
basin_id_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009.0,1837.0,Porte_du_Scex,Rhône,581764.280706,nd_1837,,,,,581812.194190,...,0.072648,0.071371,0.071871,0.071714,0.071609,"POLYGON Z ((2674253.038 1167429.881 0.000, 267...",0.0,,,2009.0
2016.0,1833.0,Brugg,Aare,827826.334191,nd_1833,,,,,827842.590414,...,0.486703,0.480337,0.477349,0.479700,0.484112,"POLYGON Z ((2655969.680 1259695.589 0.000, 265...",0.0,,,2016.0
2018.0,1835.0,Mellingen,Reuss,423443.101185,nd_1835,,,,,423447.043212,...,0.429399,0.425385,0.422040,0.424314,0.422356,"POLYGON Z ((2663723.380 1252919.068 0.000, 266...",0.0,,,2018.0
2044.0,1823.0,Andelfingen,Thur,278840.437214,nd_1823,,,,,278852.096268,...,0.873069,0.858348,0.855597,0.869395,0.868562,"POLYGON Z ((2719445.088 1279260.056 0.000, 271...",0.0,,,2044.0
2067.0,1842.0,Martina,Inn,293426.137861,nd_1842,,,,,293431.436471,...,0.058582,0.057470,0.055563,0.055084,0.055510,"POLYGON Z ((2818919.419 1201129.962 0.000, 281...",0.0,,,2067.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2478.0,,,Birse,151334.111214,nw_1497,,,1497.0,Les Riedes-Dessus,151336.979890,...,0.408785,0.401163,0.395319,0.395060,0.402401,"POLYGON Z ((2593469.122 1251650.273 0.000, 259...",0.0,,2478.0,
2486.0,,,Veveyse,50534.896959,nw_1528,,,1528.0,Vevey,50536.425993,...,0.247284,0.255509,0.245346,0.246707,0.250338,"POLYGON Z ((2558881.297 1155690.085 0.000, 255...",0.0,,2486.0,
2488.0,,,Simme,151671.931781,nw_1919,,,1919.0,Latterbach,151676.136857,...,0.259942,0.255896,0.251801,0.254637,0.256755,"POLYGON Z ((2605018.868 1172839.620 0.000, 260...",0.0,,2488.0,
2640.0,,,Sorne,82837.276182,nw_1504,,,1504.0,Delémont,82838.882339,...,0.438026,0.431327,0.426675,0.424547,0.431703,"POLYGON Z ((2584725.396 1248420.902 0.000, 258...",0.0,,2640.0,


In [14]:
# Delete the 2403 (BAFU) since it is empty for our time range, and 2622 (merged with 2243):
catchments_gve_unique.drop(2403.0, axis=0, inplace=True)
catchments_gve_unique.drop(2622.0, axis=0, inplace=True)

In [15]:
catchments_gve_unique

Unnamed: 0_level_0,naduf_id,naduf_name,water_body,Shape_Leng,ID,gauge_id,gauge_name,nawa_id,nawa_name,Shape_Le_1,...,gve_2018_h,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h,geometry,bafu_id,basin_id_x,basin_id_y,basin_id
basin_id_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009.0,1837.0,Porte_du_Scex,Rhône,581764.280706,nd_1837,,,,,581812.194190,...,0.072648,0.071371,0.071871,0.071714,0.071609,"POLYGON Z ((2674253.038 1167429.881 0.000, 267...",0.0,,,2009.0
2016.0,1833.0,Brugg,Aare,827826.334191,nd_1833,,,,,827842.590414,...,0.486703,0.480337,0.477349,0.479700,0.484112,"POLYGON Z ((2655969.680 1259695.589 0.000, 265...",0.0,,,2016.0
2018.0,1835.0,Mellingen,Reuss,423443.101185,nd_1835,,,,,423447.043212,...,0.429399,0.425385,0.422040,0.424314,0.422356,"POLYGON Z ((2663723.380 1252919.068 0.000, 266...",0.0,,,2018.0
2044.0,1823.0,Andelfingen,Thur,278840.437214,nd_1823,,,,,278852.096268,...,0.873069,0.858348,0.855597,0.869395,0.868562,"POLYGON Z ((2719445.088 1279260.056 0.000, 271...",0.0,,,2044.0
2067.0,1842.0,Martina,Inn,293426.137861,nd_1842,,,,,293431.436471,...,0.058582,0.057470,0.055563,0.055084,0.055510,"POLYGON Z ((2818919.419 1201129.962 0.000, 281...",0.0,,,2067.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2478.0,,,Birse,151334.111214,nw_1497,,,1497.0,Les Riedes-Dessus,151336.979890,...,0.408785,0.401163,0.395319,0.395060,0.402401,"POLYGON Z ((2593469.122 1251650.273 0.000, 259...",0.0,,2478.0,
2486.0,,,Veveyse,50534.896959,nw_1528,,,1528.0,Vevey,50536.425993,...,0.247284,0.255509,0.245346,0.246707,0.250338,"POLYGON Z ((2558881.297 1155690.085 0.000, 255...",0.0,,2486.0,
2488.0,,,Simme,151671.931781,nw_1919,,,1919.0,Latterbach,151676.136857,...,0.259942,0.255896,0.251801,0.254637,0.256755,"POLYGON Z ((2605018.868 1172839.620 0.000, 260...",0.0,,2488.0,
2640.0,,,Sorne,82837.276182,nw_1504,,,1504.0,Delémont,82838.882339,...,0.438026,0.431327,0.426675,0.424547,0.431703,"POLYGON Z ((2584725.396 1248420.902 0.000, 258...",0.0,,2640.0,


In [16]:
livestock_df = pd.DataFrame(index=network_camels_ch_chem.basin_id.astype(float))

livestock_df[['gve_1980_S', 'gve_1985_S', 'gve_1990_S', 'gve_1996_S', 'gve_1997_S', 'gve_1998_S', 'gve_1999_S', 'gve_2000_S', 
              'gve_2001_S', 'gve_2002_S', 'gve_2003_S', 'gve_2004_S', 'gve_2005_S', 'gve_2006_S',
       'gve_2007_S', 'gve_2008_S', 'gve_2009_S', 'gve_2010_S', 'gve_2011_S', 'gve_2012_S', 'gve_2013_S', 
       'gve_2014_S', 'gve_2015_S', 'gve_2016_S', 'gve_2017_S', 'gve_2018_S', 'gve_2019_S', 'gve_2020_S', 'gve_2021_S',
       'gve_2022_S',
       'gve_1980_h', 'gve_1985_h', 'gve_1990_h',
       'gve_1996_h', 'gve_1997_h', 'gve_1998_h', 'gve_1999_h', 'gve_2000_h',
       'gve_2001_h', 'gve_2002_h', 'gve_2003_h', 'gve_2004_h', 'gve_2005_h',
       'gve_2006_h', 'gve_2007_h', 'gve_2008_h', 'gve_2009_h', 'gve_2010_h',
       'gve_2011_h', 'gve_2012_h', 'gve_2013_h', 'gve_2014_h', 'gve_2015_h',
       'gve_2016_h', 'gve_2017_h', 'gve_2018_h', 'gve_2019_h', 'gve_2020_h',
       'gve_2021_h', 'gve_2022_h']] = catchments_gve_unique[['gve_1980_S', 'gve_1985_S', 'gve_1990_S', 'gve_1996_S', 'gve_1997_S', 'gve_1998_S', 'gve_1999_S', 'gve_2000_S', 
              'gve_2001_S', 'gve_2002_S', 'gve_2003_S', 'gve_2004_S', 'gve_2005_S', 'gve_2006_S',
       'gve_2007_S', 'gve_2008_S', 'gve_2009_S', 'gve_2010_S', 'gve_2011_S', 'gve_2012_S', 'gve_2013_S', 
       'gve_2014_S', 'gve_2015_S', 'gve_2016_S', 'gve_2017_S', 'gve_2018_S', 'gve_2019_S', 'gve_2020_S', 'gve_2021_S',
       'gve_2022_S',
       'gve_1980_h', 'gve_1985_h', 'gve_1990_h',
       'gve_1996_h', 'gve_1997_h', 'gve_1998_h', 'gve_1999_h', 'gve_2000_h',
       'gve_2001_h', 'gve_2002_h', 'gve_2003_h', 'gve_2004_h', 'gve_2005_h',
       'gve_2006_h', 'gve_2007_h', 'gve_2008_h', 'gve_2009_h', 'gve_2010_h',
       'gve_2011_h', 'gve_2012_h', 'gve_2013_h', 'gve_2014_h', 'gve_2015_h',
       'gve_2016_h', 'gve_2017_h', 'gve_2018_h', 'gve_2019_h', 'gve_2020_h',
       'gve_2021_h', 'gve_2022_h']]

In [17]:
# Now we can have it file by file exported

In [18]:
livestock_df_sum = pd.DataFrame(index=network_camels_ch_chem.basin_id.astype(float))

livestock_df_sum[['gve_1980_S', 'gve_1985_S', 'gve_1990_S', 'gve_1996_S', 'gve_1997_S', 'gve_1998_S', 'gve_1999_S', 'gve_2000_S', 
              'gve_2001_S', 'gve_2002_S', 'gve_2003_S', 'gve_2004_S', 'gve_2005_S', 'gve_2006_S',
       'gve_2007_S', 'gve_2008_S', 'gve_2009_S', 'gve_2010_S', 'gve_2011_S', 'gve_2012_S', 'gve_2013_S', 
       'gve_2014_S', 'gve_2015_S', 'gve_2016_S', 'gve_2017_S', 'gve_2018_S', 'gve_2019_S', 'gve_2020_S', 'gve_2021_S',
       'gve_2022_S']] = catchments_gve_unique[['gve_1980_S', 'gve_1985_S', 'gve_1990_S', 'gve_1996_S', 'gve_1997_S', 'gve_1998_S', 'gve_1999_S', 'gve_2000_S', 
              'gve_2001_S', 'gve_2002_S', 'gve_2003_S', 'gve_2004_S', 'gve_2005_S', 'gve_2006_S',
       'gve_2007_S', 'gve_2008_S', 'gve_2009_S', 'gve_2010_S', 'gve_2011_S', 'gve_2012_S', 'gve_2013_S', 
       'gve_2014_S', 'gve_2015_S', 'gve_2016_S', 'gve_2017_S', 'gve_2018_S', 'gve_2019_S', 'gve_2020_S', 'gve_2021_S',
       'gve_2022_S']]


livestock_df_sum

Unnamed: 0_level_0,gve_1980_S,gve_1985_S,gve_1990_S,gve_1996_S,gve_1997_S,gve_1998_S,gve_1999_S,gve_2000_S,gve_2001_S,gve_2002_S,...,gve_2013_S,gve_2014_S,gve_2015_S,gve_2016_S,gve_2017_S,gve_2018_S,gve_2019_S,gve_2020_S,gve_2021_S,gve_2022_S
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009.0,44419.803851,41498.105017,43451.703140,41600.828020,41436.130552,40982.295106,40443.809272,39908.566867,40199.651647,40573.634430,...,38406.270927,37728.171587,36922.288896,36770.221726,37630.241766,37737.947368,37074.327224,37334.332673,37252.732194,37198.045523
2011.0,24072.608870,22693.132684,24780.969716,24412.001877,24197.004803,23857.101053,23409.732720,23038.504203,23204.254769,23328.966871,...,22135.045740,21568.257099,21174.419734,21114.652015,20947.247122,20485.183897,20377.910121,20141.199484,20041.529338,20118.227113
2016.0,663540.687055,631968.362347,620404.526290,576650.707450,568162.661905,565985.244776,578360.471134,577830.517626,587676.338669,585873.775562,...,577903.115984,576598.709930,571699.608458,566756.025645,563668.203030,562913.724005,555550.452501,552094.453227,554813.969584,559916.905218
2018.0,170709.609154,166791.320867,165946.890476,149502.951379,145784.847064,146331.912050,150073.614546,149821.604692,151100.684787,151350.550289,...,147143.338383,146181.150865,145528.695756,145811.207108,144834.426915,145391.390397,144032.478276,142899.790433,143669.861440,143006.766115
2019.0,3766.310809,3685.863382,3952.956017,3730.083121,3649.827564,3680.967627,3768.220347,3739.265282,3750.969479,3771.941551,...,3568.097771,3369.758180,3297.830804,3278.220768,3293.571143,3296.777204,3257.864435,3176.686632,3149.118456,3164.913236
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2617.0,1509.806858,1436.049428,1718.433051,1605.456825,1523.111242,1529.712425,1586.784973,1611.428826,1638.872020,1633.314135,...,1576.663637,1524.803888,1493.855990,1489.389161,1495.458560,1468.922897,1473.383629,1428.709108,1422.936798,1397.229382
2623.0,287.092808,269.661620,274.561650,263.169499,258.457877,256.281457,256.753604,259.218369,261.950193,261.453973,...,253.407363,251.761861,250.026893,250.053512,249.786938,249.429092,245.991310,244.222932,245.221412,248.124981
2634.0,33964.081123,34272.654174,35385.334664,32379.820996,31911.498200,32094.983706,33368.023172,33474.554229,34055.106241,34188.802418,...,34396.668378,33799.003546,33886.131423,34042.632665,34043.657346,34711.699532,34225.528894,34103.997682,34262.607814,34344.503866
2635.0,123.275307,114.145046,112.148024,106.455627,100.274969,100.949120,104.920667,103.984611,104.272761,103.145638,...,103.015428,100.590541,100.614186,100.493735,101.265382,102.859678,102.923140,100.499011,101.192461,101.728319


In [19]:
livestock_df_ha = pd.DataFrame(index=network_camels_ch_chem.basin_id.astype(float))

livestock_df_ha[['gve_1980_h', 'gve_1985_h', 'gve_1990_h',
       'gve_1996_h', 'gve_1997_h', 'gve_1998_h', 'gve_1999_h', 'gve_2000_h',
       'gve_2001_h', 'gve_2002_h', 'gve_2003_h', 'gve_2004_h', 'gve_2005_h',
       'gve_2006_h', 'gve_2007_h', 'gve_2008_h', 'gve_2009_h', 'gve_2010_h',
       'gve_2011_h', 'gve_2012_h', 'gve_2013_h', 'gve_2014_h', 'gve_2015_h',
       'gve_2016_h', 'gve_2017_h', 'gve_2018_h', 'gve_2019_h', 'gve_2020_h',
       'gve_2021_h', 'gve_2022_h']] = catchments_gve_unique[['gve_1980_h', 'gve_1985_h', 'gve_1990_h',
       'gve_1996_h', 'gve_1997_h', 'gve_1998_h', 'gve_1999_h', 'gve_2000_h',
       'gve_2001_h', 'gve_2002_h', 'gve_2003_h', 'gve_2004_h', 'gve_2005_h',
       'gve_2006_h', 'gve_2007_h', 'gve_2008_h', 'gve_2009_h', 'gve_2010_h',
       'gve_2011_h', 'gve_2012_h', 'gve_2013_h', 'gve_2014_h', 'gve_2015_h',
       'gve_2016_h', 'gve_2017_h', 'gve_2018_h', 'gve_2019_h', 'gve_2020_h',
       'gve_2021_h', 'gve_2022_h']]

livestock_df_ha

Unnamed: 0_level_0,gve_1980_h,gve_1985_h,gve_1990_h,gve_1996_h,gve_1997_h,gve_1998_h,gve_1999_h,gve_2000_h,gve_2001_h,gve_2002_h,...,gve_2013_h,gve_2014_h,gve_2015_h,gve_2016_h,gve_2017_h,gve_2018_h,gve_2019_h,gve_2020_h,gve_2021_h,gve_2022_h
basin_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009.0,0.085512,0.079887,0.083648,0.080085,0.079768,0.078894,0.077857,0.076827,0.077387,0.078107,...,0.073935,0.072630,0.071078,0.070785,0.072441,0.072648,0.071371,0.071871,0.071714,0.071609
2011.0,0.071379,0.067289,0.073480,0.072386,0.071748,0.070740,0.069414,0.068313,0.068805,0.069174,...,0.065634,0.063954,0.062786,0.062608,0.062112,0.060742,0.060424,0.059722,0.059427,0.059654
2016.0,0.573705,0.546408,0.536411,0.498580,0.491242,0.489359,0.500059,0.499601,0.508113,0.506555,...,0.499662,0.498534,0.494300,0.490025,0.487356,0.486703,0.480337,0.477349,0.479700,0.484112
2018.0,0.504173,0.492601,0.490107,0.441542,0.430561,0.432176,0.443227,0.442483,0.446261,0.446998,...,0.434573,0.431731,0.429804,0.430639,0.427754,0.429399,0.425385,0.422040,0.424314,0.422356
2019.0,0.067843,0.066394,0.071205,0.067191,0.065745,0.066306,0.067878,0.067356,0.067567,0.067945,...,0.064273,0.060700,0.059404,0.059051,0.059328,0.059385,0.058684,0.057222,0.056726,0.057010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2617.0,0.117321,0.111590,0.133533,0.124754,0.118355,0.118868,0.123303,0.125218,0.127350,0.126918,...,0.122516,0.118487,0.116082,0.115735,0.116206,0.114144,0.114491,0.111019,0.110571,0.108573
2623.0,0.030745,0.028878,0.029403,0.028183,0.027678,0.027445,0.027496,0.027760,0.028052,0.027999,...,0.027137,0.026961,0.026775,0.026778,0.026750,0.026711,0.026343,0.026154,0.026261,0.026572
2634.0,0.709966,0.716417,0.739675,0.676850,0.667060,0.670896,0.697507,0.699734,0.711869,0.714664,...,0.719009,0.706516,0.708337,0.711608,0.711630,0.725594,0.715432,0.712891,0.716207,0.717919
2635.0,0.137278,0.127110,0.124886,0.118547,0.111665,0.112416,0.116838,0.115796,0.116117,0.114862,...,0.114717,0.112016,0.112043,0.111908,0.112768,0.114543,0.114614,0.111914,0.112686,0.113283


In [22]:
for basin in tqdm.tqdm(livestock_df.index):
    livestock_df_sum_basin = pd.DataFrame(data=livestock_df_sum.loc[basin, :])

    # Use regex to extract numeric part from the index
    livestock_df_sum_basin.index = livestock_df_sum_basin.index.to_series().str.extract('(\d+)', expand=False)

    # Optionally, convert the index back to numeric type if you want them as integers
    livestock_df_sum_basin.index = pd.to_numeric(livestock_df_sum_basin.index)

    livestock_df_sum_basin.index.name = "date"

    livestock_df_sum_basin.columns = ["gve_sum"]

    livestock_df_sum_basin["gve_ha"] = livestock_df_ha.loc[basin, :].values

    livestock_df_sum_basin.index = livestock_df_sum_basin.index.astype(int)

    # Generate a full range of years from 1980 to 2019
    full_range = pd.DataFrame(index=range(1980, 2021))

    # Reindex the dataframe to include all years
    livestock_df_sum_basin_interpolated = livestock_df_sum_basin.reindex(full_range.index)

    # Interpolate missing values
    livestock_df_sum_basin_interpolated = livestock_df_sum_basin_interpolated.interpolate(method='linear')


    livestock_df_sum_basin_interpolated = livestock_df_sum_basin_interpolated.round(4)
    livestock_df_sum_basin_interpolated.index.name = "date"

    livestock_df_sum_basin_interpolated.to_csv(PATH_OUTPUT + "\\camels_ch_chem_livestock_"+str(int(basin))+".csv", encoding='latin')

100%|██████████| 115/115 [00:00<00:00, 175.72it/s]


# End