# Statistical Analysis of Irrigation Values Inside and Outside Dam Command Areas (1980 - 2015)

This notebook performs descriptive statistics, visualizations, and hypothesis testing on
irrigation pixel values extracted from inside and outside dam command areas for the years
1980–2015.

**Outline:**
1. Setup and Configuration
2. Data Ingestion and Preprocessing
3. Pixel Classification (Inside vs. Outside)
4. Descriptive Statistics
5. Distribution Visualization
6. Hypothesis Testing and Effect Sizes
7. Bootstrap Confidence Intervals
8. Summary Figures Over Time
9. Interpretation Aids

In [7]:
# Core Imports
import os, sys, warnings
from itertools import product
from scipy.stats import poisson
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
import seaborn as sns
from IPython.display import display, Markdown
from tqdm import tqdm
import rasterio
from rasterio.mask import mask
from rasterio.io import MemoryFile
from rasterio.features import rasterize
import statsmodels.formula.api as smf

# Custom Utility Imports
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)
from Code.utils.utility import load_config, resolve_path
from Code.utils.spatial_utility import load_raster_and_reproject

warnings.filterwarnings("ignore")
config = load_config()

In [9]:
# Load CPIS shapefile
cp_gdf = gpd.read_file(resolve_path(config["Combined_CPIS_All_shp_path"]))
print(cp_gdf.head())

# Load reference irrigation raster for shape, transform, crs
with rasterio.open(resolve_path(config["Irrigation_Arid_SSA_2000_tif_path"])) as ref:
    ref_meta = ref.meta.copy()
    ref_shape = ref.shape
    ref_transform = ref.transform
    ref_crs = ref.crs

def rasterize_cpis_from_columns(gdf, year_col, out_shape, transform, crs):
    gdf_year = gdf[gdf[year_col] == 1].to_crs(crs)
    return rasterize(
        [(geom, 1) for geom in gdf_year.geometry],
        out_shape=out_shape,
        transform=transform,
        fill=0,
        dtype='uint8'
    )

cp_raster_2000 = rasterize_cpis_from_columns(cp_gdf, "year_2000", ref_shape, ref_transform, ref_crs)
cp_raster_2021 = rasterize_cpis_from_columns(cp_gdf, "year_2021", ref_shape, ref_transform, ref_crs)

cp_interp_2015 = cp_raster_2000.astype(float) + (cp_raster_2021 - cp_raster_2000) * (2015 - 2000) / (2021 - 2000)
cp_interp_2015 = np.clip(cp_interp_2015, 0, 1)


   ID  year_2000  year_2021       Country Country Co  \
0   1          1          1  South Africa        ZAF   
1   2          1          1  South Africa        ZAF   
2   3          1          1  South Africa        ZAF   
3   4          1          1  South Africa        ZAF   
4   5          1          1  South Africa        ZAF   

                                            geometry  
0  POLYGON ((2158598.021 -4057555.506, 2158658.42...  
1  POLYGON ((2158740.433 -4056950.848, 2158788.75...  
2  POLYGON ((2286656.768 -4053677.724, 2286693.00...  
3  POLYGON ((2285644.300 -4053175.531, 2285704.69...  
4  POLYGON ((2184297.066 -4052314.792, 2184333.29...  
