In [31]:
import os
import numpy as np
import xarray as xr
import pandas as pd
from functools import partial
from joblib import Parallel, delayed
from pathos.multiprocessing import ProcessingPool as Pool
# Load the ncdf4 package
import netCDF4 as nc
import rasterio
from shapely.geometry import Point
import geopandas as gpd
from rasterio.transform import from_origin
from rasterio.features import rasterize
from rasterio.transform import from_bounds

## File path and settings

In [32]:
# Set the file paths
ncname_cdi = "/Users/sabinmaharjan/projects/python/do/static/do/cdi_1.nc"
ncname_rain = "/Users/sabinmaharjan/projects/python/do/static/do/2024.forecast.nc"


pd.set_option('display.precision',2)
pd.set_option('display.float_format','{:.2f}'.format)


## Reading CDI data and creating dataframe

In [33]:
with nc.Dataset(ncname_cdi, 'r') as ncin:
    lon_cdi = ncin.variables['longitude'][:]
    lat_cdi = ncin.variables['latitude'][:]
    cdi_array = ncin.variables['cdi'][:, :, 305]

    # Create meshgrids of longitude and latitude
    lon_grid, lat_grid = np.meshgrid(lon_cdi, lat_cdi)

    # Create a DataFrame with CDI data and coordinates
    cdi_df = pd.DataFrame({
        'lon': lon_grid.ravel(),
        'lat': lat_grid.ravel(),
        'cdi': cdi_array.ravel()
    })


## Reading forecast data and creating dataframe

In [34]:
# Open the rainfall file
with nc.Dataset(ncname_rain, 'r') as ncin:
    lon_rain = ncin.variables['lon'][:]
    lat_rain = ncin.variables['lat'][:]
    n=0
    m=1
    time_r = ncin.variables['time'][n]  # Extract the time at n=0
    time_r = nc.num2date(time_r, ncin.variables['time'].units)  # Convert the time number to a datetime object
    month_name = time_r.strftime('%B')  # Get the month name
    print(f"The month at n=0 is: {month_name}")

    print(ncin.variables['percentage_of_ensembles'])
    rain_array = ncin.variables['percentage_of_ensembles'][m, n, :, :]
  # Round the latitude and longitude values
    lat_rounded = np.round(lat_rain, 2)
    lon_rounded = np.round(lon_rain, 2)

    # Create a DataFrame with rainfall data and rounded coordinates
    rain_df = pd.DataFrame({
        'lon': np.repeat(lon_rounded, len(lat_rounded)),
        'lat': np.tile(lat_rounded, len(lon_rounded)),
        'rain': rain_array.ravel()
    })
   

The month at n=0 is: April
<class 'netCDF4._netCDF4.Variable'>
float32 percentage_of_ensembles(nbins, time, lat, lon)
    _FillValue: 1e+20
    long_name: percentage of ensembles in each percentile bin
    units: None
    missing_value: 1e+20
    comment: grid based daily rainfall
    name: ens_percents
unlimited dimensions: time
current shape = (2, 5, 691, 886)
filling on


In [35]:
# Create a DataFrame with the grid of latitude and longitude coordinates
coords_df = pd.DataFrame({'lon': lon_grid.ravel(), 'lat': lat_grid.ravel()})
# Merge the coordinate DataFrame with the rainfall DataFrame
merged_df = pd.merge(coords_df, rain_df, on=['lon', 'lat'], how='left')

In [36]:
# Join the CDI and merged DataFrames
join_df = pd.merge(cdi_df, merged_df, on=['lon', 'lat'], how='left')
print("join_df.head(10)")
print(join_df.head(10))

join_df.head(10)
     lon    lat  cdi  rain
0 112.00 -44.00  NaN   NaN
1 112.05 -44.00  NaN   NaN
2 112.10 -44.00  NaN   NaN
3 112.15 -44.00  NaN   NaN
4 112.20 -44.00  NaN   NaN
5 112.25 -44.00  NaN   NaN
6 112.30 -44.00  NaN   NaN
7 112.35 -44.00  NaN   NaN
8 112.40 -44.00  NaN   NaN
9 112.45 -44.00  NaN   NaN


In [37]:

# Count the number of rows with NaN values
num_rows_with_nan = join_df.isna().any(axis=1).sum()

# Count the number of rows without NaN values
num_rows_without_nan = (~join_df.isna().any(axis=1)).sum()

print(f"Number of rows with NaN values: {num_rows_with_nan}")
print(f"Number of rows without NaN values: {num_rows_without_nan}")

rmna_df=join_df.dropna()
print(rmna_df.shape)


Number of rows with NaN values: 566432
Number of rows without NaN values: 6289
(6289, 4)


In [38]:
def classify_drought(row):
    cdi, rain = row['cdi'], row['rain']
    if cdi < 0.2:
        if rain < 50:
            if cdi < 0.02:
                return 5  # Persists
            else:
                return 6  # Worsens
        elif rain < 70:
            return 5  # Persists
        else:
            if 0.1 <= cdi < 0.2:
                return 2  # Removed
            else:
                return 3  # Improved
    else:
        if rain < 30:
            return 4  # Develops
        else:
            return 1  # No drought

In [39]:
ncell = len(rmna_df)
ncores = 4 # Adjust based on your system's capabilities

# Use pathos for multiprocessing
with Pool(ncores) as p:
    try:
        classified = p.map(classify_drought, [rmna_df.iloc[i] for i in range(ncell)])
    except Exception as e:
        print(f"An error occurred during multiprocessing: {e}")
        p.close() # Close the pool
        p.join() # Wait for the worker processes to exit
        raise # Re-raise the exception

print(len(classified))

6289


In [40]:
# Create the dataframe
df_out = pd.DataFrame({ 'lat': cdi_df['lat'],'lon': cdi_df['lon'], 'category': np.nan})

# Get the rows where NAs were removed
order = rmna_df.index.astype(int)

# Replace the category value
classified = np.array(classified)
df_out.loc[order, 'category'] = classified.astype(int)
print(df_out['category'].value_counts())


4.00    3224
1.00    2742
6.00     300
5.00      23
Name: category, dtype: int64


In [41]:
# Create a DataArray from the DataFrame
da = xr.DataArray(df_out['category'].values.reshape(cdi_array.shape),
                  coords=[('latitude', lat), ('longitude', lon)],
                  name='outlook')

# Add attributes
da.attrs['varunit'] = ''
da.attrs['longname'] = 'drought outlook'

# Create a Dataset from the DataArray
ds = da.to_dataset()
# Add the time variable to the Dataset
ds['time'] = (('time'), [time_r])


In [42]:
# Save the Dataset as a NetCDF file
out_ncname = "/Users/sabinmaharjan/projects/python/do/static/nc/1_months/"+month_name+"_Final_2024.nc"

try:
    ds.to_netcdf(out_ncname)
    print(f"file saved with name: {out_ncname}")
except Exception as e:
    print(f"An error occurred while saving the Dataset: {e}")

file saved with name: /Users/sabinmaharjan/projects/python/do/static/nc/1_months/April_Final_2024.nc
