<a href="https://colab.research.google.com/github/roy-sib2002/github-pages/blob/main/imd_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install xarray netCDF4 numpy pandas


Collecting netCDF4
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m68.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import glob
import re

# Update nlat and nlon to 31
nlat, nlon = 31, 31

# Adjust latitude and longitude ranges
lats = np.linspace(7.5, 37.5, nlat)
lons = np.linspace(67.5, 97.5, nlon)

data_dir = '/content/drive/MyDrive/Colab Notebooks/tmax_data'
file_paths = sorted(glob.glob(data_dir + "/Maxtemp_MaxT_*.GRD"))

all_data = []
all_dates = []

for file_path in file_paths:
  filename = os.path.basename(file_path)

  match = re.search(r'(\d{4})', filename)
  if not match:
    print(f"Year not found in the file name: {filename}")
    continue

  year = int(match.group(1))
  ndays = 366 if pd.Timestamp(f"{year}-12-31").is_leap_year else 365

  print(f"reading: {filename} ({ndays} days)")

  raw = np.fromfile(file_path, dtype=np.float32)
  print(f"Raw data shape before reshape: {raw.shape}")  # Print raw data shape
  expected_shape = (ndays, nlat, nlon)
  print(f"Expected data shape: {expected_shape}")  # Print expected shape

  # Calculate the expected number of elements
  expected_elements = ndays * nlat * nlon

  # Check if the raw data size matches the expected size
  if raw.size != expected_elements:
      print(f"Reshape failed for {filename}. Raw data size ({raw.size}) does not match expected size ({expected_elements}). Skipping.")
      continue  # Skip to the next file

  try:
    data = raw.reshape(expected_shape)
  except ValueError:
    print(f"Reshape failed for {filename}. Skipping.")
    print(f"Raw data shape: {raw.shape}")
    continue

  dates = pd.date_range(start=f"{year}-01-01", periods=ndays)
  all_data.append(data)
  all_dates.extend(dates)

if len(all_data) == 0:
  raise ValueError("No valid GRD files were successfully read.")

all_data_array = np.concatenate(all_data, axis=0)

ds = xr.Dataset(
    {
        'tmax': (['time', 'lat', 'lon'], all_data_array)
    },
    coords={
        'time': all_dates,
        'lat': lats,
        'lon': lons
    },
    attrs={
        'title': 'Daily Tmax from IMD files(1951-2024)',
        'description': 'Converted from .GRD to NetCDF',
        'source': 'IMD',
        'resolution': '1.0 X 1.0'
    }
)

output_path = '/content/drive/MyDrive/Colab Notebooks/tmax_IMD_1951_2024.nc'
ds.to_netcdf(output_path)
print(f"NetCDF files have been saved successfully at: \n{output_path}")

reading: Maxtemp_MaxT_1951.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1952.GRD (366 days)
Raw data shape before reshape: (351726,)
Expected data shape: (366, 31, 31)
reading: Maxtemp_MaxT_1953.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1954.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1955.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1956.GRD (366 days)
Raw data shape before reshape: (351726,)
Expected data shape: (366, 31, 31)
reading: Maxtemp_MaxT_1957.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1958.GRD (365 days)
Raw data shape before reshape: (350765,)
Expected data shape: (365, 31, 31)
reading: Maxtemp_MaxT_1959.GRD (365 days)
Raw data shape

In [None]:
import os
data_dir = '/content/drive/MyDrive/Colab Notebooks/tmax_data'
files = os.listdir(data_dir)
print(files)
test_file = os.path.join(data_dir, 'Maxtemp_MaxT_1986.GRD')

if os.path.exists(test_file):
    print("✅ File exists: Maxtemp_MaxT_1986.GRD")
else:
    print("❌ File not found: Maxtemp_MaxT_1986.GRD")

['Maxtemp_MaxT_2019.GRD', 'Maxtemp_MaxT_2018.GRD', 'Maxtemp_MaxT_2017.GRD', 'Maxtemp_MaxT_2016.GRD', 'Maxtemp_MaxT_2015.GRD', 'Maxtemp_MaxT_2014.GRD', 'Maxtemp_MaxT_2013.GRD', 'Maxtemp_MaxT_2012.GRD', 'Maxtemp_MaxT_2011.GRD', 'Maxtemp_MaxT_2010.GRD', 'Maxtemp_MaxT_2024.GRD', 'Maxtemp_MaxT_2023.GRD', 'Maxtemp_MaxT_2022.GRD', 'Maxtemp_MaxT_2021.GRD', 'Maxtemp_MaxT_2020.GRD', 'Maxtemp_MaxT_1953.GRD', 'Maxtemp_MaxT_1952.GRD', 'Maxtemp_MaxT_1954.GRD', 'Maxtemp_MaxT_1951.GRD', 'Maxtemp_MaxT_1968.GRD', 'Maxtemp_MaxT_1959.GRD', 'Maxtemp_MaxT_1963.GRD', 'Maxtemp_MaxT_1972.GRD', 'Maxtemp_MaxT_1982.GRD', 'Maxtemp_MaxT_1973.GRD', 'Maxtemp_MaxT_1975.GRD', 'Maxtemp_MaxT_1958.GRD', 'Maxtemp_MaxT_1977.GRD', 'Maxtemp_MaxT_1967.GRD', 'Maxtemp_MaxT_1971.GRD', 'Maxtemp_MaxT_1979.GRD', 'Maxtemp_MaxT_1981.GRD', 'Maxtemp_MaxT_1966.GRD', 'Maxtemp_MaxT_1957.GRD', 'Maxtemp_MaxT_1955.GRD', 'Maxtemp_MaxT_1978.GRD', 'Maxtemp_MaxT_1970.GRD', 'Maxtemp_MaxT_1961.GRD', 'Maxtemp_MaxT_1969.GRD', 'Maxtemp_MaxT_1956.GRD',

In [None]:
import glob

file_paths = sorted(glob.glob(data_dir + "/Maxtemp_MaxT_*.GRD"))

print("📂 Found files:")
for f in file_paths[:5]:  # show only first 5
    print(f)
print(f"Total found: {len(file_paths)}")

📂 Found files:
/content/drive/MyDrive/Colab Notebooks/tmax_data/Maxtemp_MaxT_1951.GRD
/content/drive/MyDrive/Colab Notebooks/tmax_data/Maxtemp_MaxT_1952.GRD
/content/drive/MyDrive/Colab Notebooks/tmax_data/Maxtemp_MaxT_1953.GRD
/content/drive/MyDrive/Colab Notebooks/tmax_data/Maxtemp_MaxT_1954.GRD
/content/drive/MyDrive/Colab Notebooks/tmax_data/Maxtemp_MaxT_1955.GRD
Total found: 74


In [20]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import glob
import re

# Grid dimensions and ranges
nlat, nlon = 31, 31
lats = np.linspace(7.5, 37.5, nlat)
lons = np.linspace(67.5, 97.5, nlon)

# Data and output directories
data_dir = '/content/drive/MyDrive/Colab Notebooks/tmax_data'
output_dir = os.path.join(data_dir, 'new_saved_folder')  # Create output directory path

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

file_paths = sorted(glob.glob(data_dir + "/Maxtemp_MaxT_*.GRD"))

for file_path in file_paths:
    filename = os.path.basename(file_path)
    match = re.search(r'(\d{4})', filename)
    if not match:
        print(f"Year not found in the file name: {filename}")
        continue

    year = int(match.group(1))
    ndays = 366 if pd.Timestamp(f"{year}-12-31").is_leap_year else 365

    print(f"reading: {filename} ({ndays} days)")

    raw = np.fromfile(file_path, dtype=np.float32)
    expected_shape = (ndays, nlat, nlon)
    expected_elements = ndays * nlat * nlon

    if raw.size != expected_elements:
        print(f"Reshape failed for {filename}. Raw data size ({raw.size}) does not match expected size ({expected_elements}). Skipping.")
        continue

    try:
        data = raw.reshape(expected_shape)
    except ValueError:
        print(f"Reshape failed for {filename}. Skipping.")
        continue

    dates = pd.date_range(start=f"{year}-01-01", periods=ndays)

    ds = xr.Dataset(
        {
            'tmax': (['time', 'lat', 'lon'], data)
        },
        coords={
            'time': dates,
            'lat': lats,
            'lon': lons
        },
        attrs={
            'title': f'Daily Tmax from IMD file ({year})',
            'description': 'Converted from .GRD to NetCDF',
            'source': 'IMD',
            'resolution': '1.0 X 1.0'
        }
    )

    # Create output file path with year in the filename
    output_path = os.path.join(output_dir, f"tmax_IMD_{year}.nc")
    ds.to_netcdf(output_path)
    print(f"NetCDF file saved: {output_path}")

print("All NetCDF files have been saved successfully in 'new_saved_folder'.")

reading: Maxtemp_MaxT_1951.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1951.nc
reading: Maxtemp_MaxT_1952.GRD (366 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1952.nc
reading: Maxtemp_MaxT_1953.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1953.nc
reading: Maxtemp_MaxT_1954.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1954.nc
reading: Maxtemp_MaxT_1955.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1955.nc
reading: Maxtemp_MaxT_1956.GRD (366 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_1956.nc
reading: Maxtemp_MaxT_1957.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/tmax_data/new_saved_folder/tmax_IMD_

In [21]:
import numpy as np
import pandas as pd
import xarray as xr
import os
import glob
import re

# Grid dimensions and ranges
nlat, nlon = 31, 31
lats = np.linspace(7.5, 37.5, nlat)
lons = np.linspace(67.5, 97.5, nlon)

# Data and output directories
data_dir = '/content/drive/MyDrive/Colab Notebooks/tmax_data'
output_dir = '/content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp'  # Updated output directory

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

file_paths = sorted(glob.glob(data_dir + "/Maxtemp_MaxT_*.GRD"))

for file_path in file_paths:
    filename = os.path.basename(file_path)
    match = re.search(r'(\d{4})', filename)
    if not match:
        print(f"Year not found in the file name: {filename}")
        continue

    year = int(match.group(1))
    ndays = 366 if pd.Timestamp(f"{year}-12-31").is_leap_year else 365

    print(f"reading: {filename} ({ndays} days)")

    raw = np.fromfile(file_path, dtype=np.float32)
    expected_shape = (ndays, nlat, nlon)
    expected_elements = ndays * nlat * nlon

    if raw.size != expected_elements:
        print(f"Reshape failed for {filename}. Raw data size ({raw.size}) does not match expected size ({expected_elements}). Skipping.")
        continue

    try:
        data = raw.reshape(expected_shape)
    except ValueError:
        print(f"Reshape failed for {filename}. Skipping.")
        continue

    dates = pd.date_range(start=f"{year}-01-01", periods=ndays)

    ds = xr.Dataset(
        {
            'tmax': (['time', 'lat', 'lon'], data)
        },
        coords={
            'time': dates,
            'lat': lats,
            'lon': lons
        },
        attrs={
            'title': f'Daily Tmax from IMD file ({year})',
            'description': 'Converted from .GRD to NetCDF',
            'source': 'IMD',
            'resolution': '1.0 X 1.0'
        }
    )

    # Create output file path with year in the filename
    output_path = os.path.join(output_dir, f"tmax_IMD_{year}.nc")
    ds.to_netcdf(output_path)
    print(f"NetCDF file saved: {output_path}")

print("All NetCDF files have been saved successfully in 'new_updated_imd_temp'.")

reading: Maxtemp_MaxT_1951.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1951.nc
reading: Maxtemp_MaxT_1952.GRD (366 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1952.nc
reading: Maxtemp_MaxT_1953.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1953.nc
reading: Maxtemp_MaxT_1954.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1954.nc
reading: Maxtemp_MaxT_1955.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1955.nc
reading: Maxtemp_MaxT_1956.GRD (366 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1956.nc
reading: Maxtemp_MaxT_1957.GRD (365 days)
NetCDF file saved: /content/drive/MyDrive/Colab Notebooks/new_updated_imd_temp/tmax_IMD_1957.nc
reading: Maxtemp_MaxT_1958.GRD (36