In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import os

# Define the folder containing the NetCDF files
folder_path = "dataset-satellite-precipitation-ec9f814a-6a57-440d-9652-1b5fcab3b128"

# Define the target latitude and longitude grids with 0.25 spacing
target_lat = np.arange(6.5, 38.5, 0.25)  # Target latitude grid with 0.25 spacing
target_lon = np.arange(66.5, 100, 0.25)  # Target longitude grid with 0.25 spacing

# Initialize an empty DataFrame to hold the combined data
combined_dataframe = pd.DataFrame()

# Loop through all NetCDF files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".nc"):
        file_path = os.path.join(folder_path, filename)
        
        # Open the original NetCDF file
        original_data = xr.open_dataset(file_path)
        
        # Identify latitude and longitude coordinates or dimensions
        lat_name = [dim for dim in original_data.coords if 'latitude' in dim][0]
        lon_name = [dim for dim in original_data.coords if 'longitude' in dim][0]

        # Regrid the data to the target grid without interpolating missing values
        regridded_data = original_data.interp({lat_name: target_lat, lon_name: target_lon}, method='nearest')

        # Convert the regridded data to a pandas DataFrame
        regridded_dataframe = regridded_data.to_dataframe().reset_index()  # Reset index to have lon and lat as columns

        # Rename columns to desired names
        regridded_dataframe = regridded_dataframe.rename(columns={lat_name: "Lat", lon_name: "Lon", "time": "Date", "precip": "rain"})
        
        # Convert the date to the desired format YYYYMMDD
        regridded_dataframe['Date'] = pd.to_datetime(regridded_dataframe['Date']).dt.strftime('%Y%m%d')

        # Drop duplicate coordinate pairs (Lat, Lon, Date)
        unique_coordinates = regridded_dataframe.drop_duplicates(subset=['Lat', 'Lon', 'Date'])

        # Append to the combined DataFrame
        combined_dataframe = pd.concat([combined_dataframe, unique_coordinates])

# Drop duplicates in the combined DataFrame based on coordinates
combined_dataframe = combined_dataframe.drop_duplicates(subset=['Lat', 'Lon', 'Date'])

# Save the combined DataFrame with unique coordinates back to CSV
csv_file_path = "combined_unique_coordinates.csv"
combined_dataframe.to_csv(csv_file_path, index=False)

print("Combined unique coordinates saved to:", csv_file_path)

Combined unique coordinates saved to: combined_unique_coordinates.csv


In [None]:
data= pd.read_csv(csv_file_path)
# List of column names to drop
columns_to_drop = ['nv','lat_bounds','lon_bounds','time_bounds']  # Replace with your column names

# Drop the specified columns
data = data.drop(columns=columns_to_drop)

In [5]:
# Ensure 'Date' column is in datetime format with the specified format 'YYYYMMDD'
data['Date'] = pd.to_datetime(data['Date'], format='%Y%m%d')

# Sort the DataFrame by 'Lat', 'Lon', and 'Date' columns
data = data.sort_values(by=['Lat', 'Lon', 'Date'])

data['Date'] = data['Date'].dt.strftime('%Y%m%d')

In [6]:
cols = list(data.columns)
first_col = cols.pop(0)  # Remove the first column
cols.insert(-1, first_col)  # Insert it at the second last position
data = data[cols]

In [7]:
data.to_csv('rain.csv', index=False)


In [8]:
import pandas as pd

# Replace 'your_file.csv' with the path to your CSV file
file_path = '/home/fgrslab/Saurabh_send/rain.csv'

# Read the CSV file
data = pd.read_csv(file_path)
# Print the first 5 rows of the DataFrame
print(data.head())

   Lat   Lon      Date  rain
0  6.5  66.5  20030101   NaN
1  6.5  66.5  20030102   NaN
2  6.5  66.5  20030103   NaN
3  6.5  66.5  20030104   NaN
4  6.5  66.5  20030105   NaN
