In [48]:
import os
import numpy as np
import xarray as xr
import pandas as pd
from functools import partial
from joblib import Parallel, delayed
from pathos.multiprocessing import ProcessingPool as Pool
# Load the ncdf4 package
import netCDF4 as nc
import rasterio
from shapely.geometry import Point
import geopandas as gpd
from rasterio.transform import from_origin
from rasterio.features import rasterize
from rasterio.transform import from_bounds

## File path and settings

In [49]:
# Set the file paths
ncname_cdi = "/Users/sabinmaharjan/projects/python/do/static/do/cdi_1.nc"
ncname_rain = "/Users/sabinmaharjan/projects/python/do/static/do/for_rain.nc"


pd.set_option('display.precision',2)
pd.set_option('display.float_format','{:.2f}'.format)


## Reading CDI data and creating dataframe

In [50]:
# Open the CDI file
with nc.Dataset(ncname_cdi, 'r') as nc_cdi:
    lon = nc_cdi.variables['longitude'][:]
    lat = nc_cdi.variables['latitude'][:]
    time=305
    cdi_array = nc_cdi.variables['cdi'][:, :, time]
    cdi_vec = cdi_array.ravel()
    cdi_df = pd.DataFrame({'lon': np.repeat(lon, len(lat)), 'lat': np.tile(lat, len(lon)), 'cdi': cdi_vec})
    print(cdi_df.shape)
    # cdi_df=cdi_df.dropna()
    # print(cdi_df.shape)


(572721, 3)


## Reading forecast data and creating dataframe

In [51]:
# Open the rainfall file
with nc.Dataset(ncname_rain, 'r') as ncrain:
   
    # Example: Extract latitude and longitude data from the NetCDF file
    lon_r = ncrain.variables['lon'][:]
    lat_r = ncrain.variables['lat'][:]

     # Define indices for selecting a specific subset of data
    time = 2
    nbins = 2
    time_r = ncrain.variables['time'][time]  # Extract the time at n=0
    time_r = nc.num2date(time_r, ncrain.variables['time'].units)  # Convert the time number to a datetime object
    month_name = time_r.strftime('%B')  # Get the month name
    print(f"The month at n=0 is: {month_name}")
    # Extract rain data for the specified indices
    rain_array = ncrain.variables['percentage_of_ensembles'][nbins-1, time-1, :, :]
    rain_vec = rain_array.ravel()

    # Tile latitude and repeat longitude to match the shape of rain_array
    lat_tile = np.tile(lat_r, len(lon_r))
    lon_repeat = np.repeat(lon_r, len(lat_r))

    # Create DataFrame
    rain_df = pd.DataFrame({
        'lat': np.round(lat_tile, 2), 
        'lon': np.round(lon_repeat, 2), 
        'rain': rain_vec
    })

    # Print DataFrame
    print(rain_df.head(10))
    print("rain_df shape")
    print(rain_df.shape)

    # rain_df=rain_df.dropna()
    # print(rain_df.shape)
    # print(rain_df.head(10))

The month at n=0 is: June
     lat    lon  rain
0 -44.50 112.00   NaN
1 -44.45 112.00   NaN
2 -44.40 112.00   NaN
3 -44.35 112.00   NaN
4 -44.30 112.00   NaN
5 -44.25 112.00   NaN
6 -44.20 112.00   NaN
7 -44.15 112.00   NaN
8 -44.10 112.00   NaN
9 -44.05 112.00   NaN
rain_df shape
(612226, 3)


In [52]:
# Join the CDI and rainfall dataframes
join_df = pd.merge(rain_df,cdi_df, on=['lat', 'lon'], how='right')
print(join_df.shape)
print("join_df.head(10)")
print(join_df.head(10))
# Count the number of rows with NaN values
num_rows_with_nan = join_df.isna().any(axis=1).sum()

# Count the number of rows without NaN values
num_rows_without_nan = (~join_df.isna().any(axis=1)).sum()

print(f"Number of rows with NaN values: {num_rows_with_nan}")
print(f"Number of rows without NaN values: {num_rows_without_nan}")

rmna_df=join_df.dropna()
print(rmna_df.shape)


(572721, 4)
join_df.head(10)
     lat    lon  rain  cdi
0 -44.00 112.00   NaN  NaN
1 -43.95 112.00   NaN  NaN
2 -43.90 112.00   NaN  NaN
3 -43.85 112.00   NaN  NaN
4 -43.80 112.00   NaN  NaN
5 -43.75 112.00   NaN  NaN
6 -43.70 112.00   NaN  NaN
7 -43.65 112.00   NaN  NaN
8 -43.60 112.00   NaN  NaN
9 -43.55 112.00   NaN  NaN
Number of rows with NaN values: 378949
Number of rows without NaN values: 193772
(193772, 4)


In [53]:
def classify_drought(row):
    cdi, rain = row['cdi'], row['rain']
    if cdi < 0.2:
        if rain < 50:
            if cdi < 0.02:
                return 5  # Persists
            else:
                return 6  # Worsens
        elif rain < 70:
            return 5  # Persists
        else:
            if 0.1 <= cdi < 0.2:
                return 2  # Removed
            else:
                return 3  # Improved
    else:
        if rain < 30:
            return 4  # Develops
        else:
            return 1  # No drought

In [54]:
ncell = len(rmna_df)
ncores = 4 # Adjust based on your system's capabilities

# Use pathos for multiprocessing
with Pool(ncores) as p:
    try:
        classified = p.map(classify_drought, [rmna_df.iloc[i] for i in range(ncell)])
    except Exception as e:
        print(f"An error occurred during multiprocessing: {e}")
        p.close() # Close the pool
        p.join() # Wait for the worker processes to exit
        raise # Re-raise the exception

print(len(classified))

193772


In [55]:
# Create the dataframe
df_out = pd.DataFrame({ 'lat': cdi_df['lat'],'lon': cdi_df['lon'], 'category': np.nan})

# Get the rows where NAs were removed
order = rmna_df.index.astype(int)

# Replace the category value
classified = np.array(classified)
df_out.loc[order, 'category'] = classified.astype(int)
print(df_out['category'].value_counts())


1.00    184599
6.00      6720
4.00      1875
5.00       578
Name: category, dtype: int64


In [56]:
# Create a DataArray from the DataFrame
da = xr.DataArray(df_out['category'].values.reshape(cdi_array.shape),
                  coords=[('latitude', lat), ('longitude', lon)],
                  name='outlook')

# Add attributes
da.attrs['varunit'] = ''
da.attrs['longname'] = 'drought outlook'

# Create a Dataset from the DataArray
ds = da.to_dataset()
# Add the time variable to the Dataset
ds['time'] = (('time'), [time_r])


In [57]:
# Save the Dataset as a NetCDF file
out_ncname = "/Users/sabinmaharjan/projects/python/do/static/nc/1_months/"+month_name+"_Final_2024.nc"

try:
    ds.to_netcdf(out_ncname)
    print(f"file saved with name: {out_ncname}")
except Exception as e:
    print(f"An error occurred while saving the Dataset: {e}")

file saved with name: /Users/sabinmaharjan/projects/python/do/static/nc/1_months/June_Final_2024.nc
