In [1]:
import geopandas as gpd
import pandas as pd
import fiona
import matplotlib.pyplot as plt
from shapely.geometry import Point
from tqdm import tqdm
import rasterio
import numpy as np
import yaml
import os

# Load configuration
with open('../../config.yaml', 'r') as file:
    config = yaml.safe_load(file)

def resolve_path(relative_path):
    return os.path.join(config['base_path'], relative_path)

In [25]:
# load the groundwater productivity dataset that is restricted to arid cropland
gdf = gpd.read_file(resolve_path(config['Groundwater_Prod_Arid_Cropland_gpkg_path']))

gdf.plot(column='Liters_Second')
plt.show()

In [26]:
# add back in the original categories as a new column and their ranges
value_map = {'VH': 20.0, 'H': 12.5, 'M': 3.0, 'LM': 0.75, 'L': 0.3, 'VL': 0.1}

# Reverse the value_map
reverse_value_map = {v: k for k, v in value_map.items()}

# Create the 'GW_prod_cat' column based on the 'Liters_Second' column
gdf['GW_prod_cat'] = gdf['Liters_Second'].map(reverse_value_map)

# now get the actual categories
value_map_cat = {
    'VH': '>20',
    'H': '5-20',
    'M': '1-5',
    'LM': '0.5-1',
    'L': '0.1-0.5',
    'VL': '<0.1'
}

# Create the 'GW_prod_cat' column based on the 'Liters_Second' column
gdf['GW_prod_cat_L_s'] = gdf['GW_prod_cat'].map(value_map_cat)

gdf.head()

Unnamed: 0,X,Y,Liters_Second,in_cropland,geometry,GW_prod_cat,GW_prod_cat_L_s
0,9.41405,37.251192,3.0,True,POINT (9.41405 37.25119),M,1-5
1,9.46405,37.251192,3.0,True,POINT (9.46405 37.25119),M,1-5
2,9.56405,37.251192,3.0,True,POINT (9.56405 37.25119),M,1-5
3,9.61405,37.251192,12.5,True,POINT (9.61405 37.25119),H,5-20
4,9.66405,37.251192,3.0,True,POINT (9.66405 37.25119),M,1-5


In [27]:
# Add the country name column

# Load the country boundaries shapefile
Africa_boundaries = gpd.read_file(resolve_path(config['Africa_boundaries_shp_path']))

# Ensure both GeoDataFrames use the same coordinate reference system (CRS)
gdf = gdf.to_crs(Africa_boundaries.crs)

# Perform the spatial join
gdf = gpd.sjoin(gdf, Africa_boundaries, how="left", op='intersects')

gdf = gdf.rename(columns={'NAME_0': 'Country'})
gdf.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,X,Y,Liters_Second,in_cropland,geometry,GW_prod_cat,GW_prod_cat_L_s,index_right,OBJECTID,ISO,Country,Continent,REgion
0,9.41405,37.251192,3.0,True,POINT (9.41405 37.25119),M,1-5,51.0,52.0,TUN,Tunisia,Africa,
1,9.46405,37.251192,3.0,True,POINT (9.46405 37.25119),M,1-5,51.0,52.0,TUN,Tunisia,Africa,
2,9.56405,37.251192,3.0,True,POINT (9.56405 37.25119),M,1-5,51.0,52.0,TUN,Tunisia,Africa,
3,9.61405,37.251192,12.5,True,POINT (9.61405 37.25119),H,5-20,51.0,52.0,TUN,Tunisia,Africa,
4,9.66405,37.251192,3.0,True,POINT (9.66405 37.25119),M,1-5,51.0,52.0,TUN,Tunisia,Africa,


In [None]:
# Plot the GeoDataFrame, color-coded by country
fig, ax = plt.subplots(figsize=(10, 10))
gdf.plot(ax=ax, column='Country', legend=True, cmap='tab20', markersize=50)
ax.set_title('Plot by Country Name')
plt.show()

In [31]:
# load the cropland raster
raw_cropland_clipped_path = resolve_path(config['Arid_Cropland_tif_path'])
cropland = rasterio.open(raw_cropland_clipped_path)

with rasterio.open(raw_cropland_clipped_path) as cropped_data:
    cropped_image = cropped_data.read(1)
    plt.figure(figsize=(10, 10))
    plt.imshow(cropped_image, cmap='viridis', vmin=0, vmax=1)
    plt.colorbar(label='Cropland Percentage')
    plt.title('Cropped Cropland Data within Arid Regions')
    plt.show()

# Assure the same coordinate system
gdf = gdf.to_crs(cropland.crs)

# Function to get the cropland value at each point's location
def get_cropland_value(row, raster, transform, nodata):
    x, y = row.geometry.x, row.geometry.y
    # Convert coordinates to raster space
    row_idx, col_idx = ~transform * (x, y)
    row_idx, col_idx = int(row_idx), int(col_idx)
    # Check if the point falls within the raster coverage
    if 0 <= row_idx < raster.shape[0] and 0 <= col_idx < raster.shape[1]:
        value = raster[row_idx, col_idx]
        # Return np.nan if the value is nodata
        return np.nan if value == nodata else value
    else:
        return np.nan

# Apply the function to get the cropland value for each point
gdf['Cropland_Value'] = gdf.apply(get_cropland_value, axis=1, raster=cropland.read(1), transform=cropland.transform)

gdf.head()


In [45]:
gdf = gdf.dropna(subset=['Cropland_Value'])

# Group by country and groundwater productivity category to calculate the total cropland per group
grouped = gdf.groupby(['Country', 'GW_prod_cat'])['Cropland_Value'].sum().reset_index()

# Calculate the total cropland per country
total_cropland_per_country = gdf.groupby('Country')['Cropland_Value'].sum().reset_index()
total_cropland_per_country = total_cropland_per_country.rename(columns={'Cropland_Value': 'Total_Cropland'})

# Merge the grouped data with the total cropland per country
merged = grouped.merge(total_cropland_per_country, on='Country')

# Calculate the percent cropland per groundwater productivity category within each country
merged['Percent_Cropland'] = (merged['Cropland_Value'] / merged['Total_Cropland']) * 100

# Save the results to a file if needed
merged.to_csv("percent_cropland_per_country.csv", index=False)

# Display the first few rows to verify the results
print(merged.head())

  Country GW_prod_cat  Cropland_Value  Total_Cropland  Percent_Cropland
0  Angola           H         543.561         592.297         91.771696
1  Angola           L          10.713         592.297          1.808721
2  Angola          LM          20.154         592.297          3.402685
3  Angola           M           0.924         592.297          0.156003
4  Angola          VH          16.945         592.297          2.860896


In [49]:
print(merged.tail())

     Country GW_prod_cat  Cropland_Value  Total_Cropland  Percent_Cropland
85  Zimbabwe           L      560.735001      792.517001         70.753687
86  Zimbabwe          LM       58.048000      792.517001          7.324512
87  Zimbabwe           M      154.129000      792.517001         19.448037
88  Zimbabwe          VH        1.705000      792.517001          0.215137
89  Zimbabwe          VL        0.176000      792.517001          0.022208


In [46]:
CPIS = gpd.read_file(resolve_path(config['CPIS_Groundwater_Prod_gpkg_path']))


# Define the ranges and corresponding categories
def categorize_liters_per_second(value):
    if value >= 20:
        return 'VH'
    elif 5 <= value < 20:
        return 'H'
    elif 1 <= value < 5:
        return 'M'
    elif 0.5 <= value < 1:
        return 'LM'
    elif 0.1 < value < 0.5:
        return 'L'
    elif value <= 0.1:
        return 'VL'
    else:
        return 'Unknown'  # In case the value falls outside the specified ranges


# Apply the function to create the 'GW_prod_cat' column
CPIS['GW_prod_cat'] = CPIS['Liters_Second'].apply(categorize_liters_per_second)

CPIS.head()

Unnamed: 0,X,Y,Liters_Second,in_cropland,index_right,ID,year_2000,year_2021,Country,Country Co,geometry,GW_prod_cat
0,-7.48595,33.401192,12.5,True,8028,8029,1,0,Morocco,MAR,POINT (-7.48595 33.40119),H
1,-7.58595,33.301192,12.5,True,8026,8027,1,0,Morocco,MAR,POINT (-7.58595 33.30119),H
2,-7.58595,33.201192,12.5,True,8016,8017,1,0,Morocco,MAR,POINT (-7.58595 33.20119),H
3,13.16405,32.751192,12.5,True,7993,7994,1,0,Libya,LBY,POINT (13.16405 32.75119),H
4,-6.83595,32.501192,12.5,True,7960,7961,1,0,Morocco,MAR,POINT (-6.83595 32.50119),H


In [47]:
CPIS = CPIS.merge(merged[['Country', 'GW_prod_cat', 'Percent_Cropland']], on=['Country', 'GW_prod_cat'], how='left')

CPIS.head()

Unnamed: 0,X,Y,Liters_Second,in_cropland,index_right,ID,year_2000,year_2021,Country,Country Co,geometry,GW_prod_cat,Percent_Cropland
0,-7.48595,33.401192,12.5,True,8028,8029,1,0,Morocco,MAR,POINT (-7.48595 33.40119),H,
1,-7.58595,33.301192,12.5,True,8026,8027,1,0,Morocco,MAR,POINT (-7.58595 33.30119),H,
2,-7.58595,33.201192,12.5,True,8016,8017,1,0,Morocco,MAR,POINT (-7.58595 33.20119),H,
3,13.16405,32.751192,12.5,True,7993,7994,1,0,Libya,LBY,POINT (13.16405 32.75119),H,
4,-6.83595,32.501192,12.5,True,7960,7961,1,0,Morocco,MAR,POINT (-6.83595 32.50119),H,


In [48]:
# remove the northern african countries
northern_africa_countries = ['Algeria', 'Egypt', 'Libya', 'Morocco', 'Sudan', 'Tunisia', 'Western Sahara']
CPIS = CPIS[~CPIS['Country'].isin(northern_africa_countries)]

CPIS.head()

Unnamed: 0,X,Y,Liters_Second,in_cropland,index_right,ID,year_2000,year_2021,Country,Country Co,geometry,GW_prod_cat,Percent_Cropland
48,-15.78595,16.151192,20.0,True,30817,30818,0,1,Senegal,SEN,POINT (-15.78595 16.15119),VH,
49,-5.93595,13.901192,3.0,True,30780,30781,0,1,Mali,MLI,POINT (-5.93595 13.90119),M,72.680101
50,-4.68595,10.751192,3.0,True,30748,30749,0,1,Burkina Faso,BFA,POINT (-4.68595 10.75119),M,6.837512
51,18.51405,8.951192,3.0,True,30737,30738,0,1,Chad,TCD,POINT (18.51405 8.95119),M,
52,36.41405,-0.698808,12.5,True,30696,30697,0,1,Kenya,KEN,POINT (36.41405 -0.69881),H,


What's left to do is calculate the targetting ratios for each productivity category 