In [1]:
import xarray as xr
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime,timedelta
import warnings


In [2]:
path='/mnt/1A42C1DD42C1BE2F/MyProjects/ML_HEATWAVE/Volumetric Soil Moisture/*.nc'
soil_moisture=xr.open_mfdataset(path)

In [3]:
soil_moisture

Unnamed: 0,Array,Chunk
Bytes,1.03 GiB,25.20 MiB
Shape,"(15340, 94, 192)","(366, 94, 192)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.03 GiB 25.20 MiB Shape (15340, 94, 192) (366, 94, 192) Dask graph 42 chunks in 85 graph layers Data type float32 numpy.ndarray",192  94  15340,

Unnamed: 0,Array,Chunk
Bytes,1.03 GiB,25.20 MiB
Shape,"(15340, 94, 192)","(366, 94, 192)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,239.69 kiB,5.72 kiB
Shape,"(15340, 2)","(366, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 239.69 kiB 5.72 kiB Shape (15340, 2) (366, 2) Dask graph 42 chunks in 85 graph layers Data type float64 numpy.ndarray",2  15340,

Unnamed: 0,Array,Chunk
Bytes,239.69 kiB,5.72 kiB
Shape,"(15340, 2)","(366, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In the context of this dataset, the unit "fraction" is used to express volumetric soil moisture. Volumetric soil moisture represents the amount of water present in the soil as a fraction or percentage of the total volume of the soil.

Here's a more detailed explanation:

- **Volumetric Soil Moisture**: Soil moisture is the amount of water contained in the soil. Volumetric soil moisture specifically expresses this as a fraction of the total volume of the soil.

- **Unit "Fraction"**: In this dataset, soil moisture is expressed as a fraction, which means it is a ratio of the volume of water to the total volume of the soil. It is typically expressed as a value between 0 and 1, where:
  - 0 represents completely dry soil (no water content).
  - 1 represents completely saturated soil (all available pore spaces filled with water).
  - Values between 0 and 1 represent varying degrees of soil moisture content. For example, 0.5 would mean that half of the soil's pore spaces are filled with water.

In summary, when you see "fraction" as the unit for soil moisture, it means that the dataset is providing information about the proportion of the soil's volume that is occupied by water. This is a common unit for expressing soil moisture in scientific and environmental datasets.

#### we need to fix the grid size.

In [4]:
new_lat = np.arange(90, -91, -02.5).astype(np.float32)
new_lon = np.arange(0, 360, 02.5).astype(np.float32)

# Resample to the new grid size
resized_soil_moisture = soil_moisture.interp(lat=new_lat, lon=new_lon)

In [5]:
lat_range = slice(29,18.0)  # take lat values in backwards directions.
lon_range = slice(85, 97)  # 88.0 degrees east to 92.7 degrees east

# Use the sel method to select data within the specified Bangladesh region
bangladesh_resized_soil_moisture = resized_soil_moisture.sel(lat=lat_range,lon=lon_range)
bangladesh_resized_soil_moisture

Unnamed: 0,Array,Chunk
Bytes,1.17 MiB,28.59 kiB
Shape,"(15340, 4, 5)","(366, 4, 5)"
Dask graph,42 chunks in 99 graph layers,42 chunks in 99 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.17 MiB 28.59 kiB Shape (15340, 4, 5) (366, 4, 5) Dask graph 42 chunks in 99 graph layers Data type float32 numpy.ndarray",5  4  15340,

Unnamed: 0,Array,Chunk
Bytes,1.17 MiB,28.59 kiB
Shape,"(15340, 4, 5)","(366, 4, 5)"
Dask graph,42 chunks in 99 graph layers,42 chunks in 99 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,239.69 kiB,5.72 kiB
Shape,"(15340, 2)","(366, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 239.69 kiB 5.72 kiB Shape (15340, 2) (366, 2) Dask graph 42 chunks in 85 graph layers Data type float64 numpy.ndarray",2  15340,

Unnamed: 0,Array,Chunk
Bytes,239.69 kiB,5.72 kiB
Shape,"(15340, 2)","(366, 2)"
Dask graph,42 chunks in 85 graph layers,42 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
import pandas as pd
import xarray as xr
from tqdm.auto import tqdm# Import tqdm
warnings.filterwarnings('ignore')

# # Open your netCDF dataset
# ds = xr.open_dataset('your_dataset.nc')

# Define the chunk size
chunk_size = 500  # Adjust this based on your available memory

# Initialize an empty DataFrame to store the results
soilw_df = pd.DataFrame(columns=['time', 'lat', 'lon', 'soilw'])

# Calculate the number of chunks
num_chunks = len(bangladesh_resized_soil_moisture.time)

# Create a tqdm progress bar
for start in tqdm(range(0, num_chunks, chunk_size),desc='Main Loop'):
    end = start + chunk_size
    chunk = bangladesh_resized_soil_moisture.isel(time=slice(start, end))

    # Extract the data
    time = chunk.time.values
    lat = chunk.lat.values
    lon = chunk.lon.values
    soilw = chunk.soilw.values

    # Create a DataFrame for this chunk and append it to the main DataFrame
    chunk_df = pd.DataFrame({
        'time': time.repeat(len(lat) * len(lon)),
        'lat': np.tile(lat, len(time) * len(lon)),
        'lon': np.tile(lon, len(time) * len(lat)),
        'soilw': soilw.ravel()
    })

    soilw_df = soilw_df.append(chunk_df, ignore_index=True)

# t2m_df now contains the concatenated data
soilw_df

Main Loop:   0%|          | 0/31 [00:00<?, ?it/s]

Unnamed: 0,time,lat,lon,soilw
0,1981-01-01,27.5,85.0,0.256957
1,1981-01-01,25.0,87.5,0.272241
2,1981-01-01,22.5,90.0,0.272919
3,1981-01-01,20.0,92.5,0.263421
4,1981-01-01,27.5,95.0,0.317205
...,...,...,...,...
306795,2022-12-31,20.0,85.0,
306796,2022-12-31,27.5,87.5,
306797,2022-12-31,25.0,90.0,
306798,2022-12-31,22.5,92.5,
