The variable names are follows:
1. Wind_Speed_10m_Mean 
2. Relative_Humidity_2m_09h
3. Cloud_Cover_Mean

In [None]:
import os
import xarray as xr
import numpy as np

# Define the path to the main folder containing all the year folders
main_folder_path = "data_in_nc4/humidity"  # Change according to the variable name

# Define the path to the output folder where all yearly output folders will be saved
output_main_folder_path = "raw_files/humidity"

# Create the output folder if it does not exist
os.makedirs(output_main_folder_path, exist_ok=True)

# Iterate over year folders in the main folder
for year_folder_name in os.listdir(main_folder_path):
    # Construct the full path to the year folder
    year_folder_path = os.path.join(main_folder_path, year_folder_name)
    
    # Check if the item in the main folder is indeed a directory
    if os.path.isdir(year_folder_path):
        # Define the path to the new folder where CSV files will be saved for this year
        output_year_folder_path = os.path.join(output_main_folder_path, f"{year_folder_name}_unique_coordinates")
        os.makedirs(output_year_folder_path, exist_ok=True)
        
        # Iterate over files in the year folder
        for file_name in os.listdir(year_folder_path):
            # Check if the file is a NetCDF file
            if file_name.endswith(".nc"):
                # Construct the full path to the file
                file_path = os.path.join(year_folder_path, file_name)
                
                # Extract the date from the file name
                file_date_parts = file_name.split('_')
                file_date = None
                for part in file_date_parts:
                    if len(part) == 8 and part.isdigit():
                        file_date = part
                        break
                
                if file_date is None:
                    print(f"Unable to extract date from file name: {file_name}")
                    continue
                
                # Perform the operations as in the previous code snippet
                original_data = xr.open_dataset(file_path)
                lat_name = [dim for dim in original_data.coords if 'lat' in dim][0]
                lon_name = [dim for dim in original_data.coords if 'lon' in dim][0]
                target_lat = np.arange(6.5, 38.5, 0.25)
                target_lon = np.arange(66.5, 100, 0.25)
                regridded_data = original_data.interp({lat_name: target_lat, lon_name: target_lon}, method='nearest')
                regridded_data = regridded_data[['Relative_Humidity_2m_09h']]  # Adjust according to the variable name
                regridded_dataframe = regridded_data.to_dataframe().reset_index()
                unique_coordinates = regridded_dataframe.drop_duplicates(subset=['lat', 'lon'])
                csv_file_path = os.path.join(output_year_folder_path, f"{file_date}.csv")  # Corrected file name
                unique_coordinates.to_csv(csv_file_path, index=False)
                
                print(f"Processed {file_path} and saved output to {csv_file_path}")


In [None]:
import os
import pandas as pd

# Define the path to the yearly output folder
yearly_output_folder = "raw_files/humidity"

# Iterate over year folders in the yearly output folder
for year_folder_name in os.listdir(yearly_output_folder):
    # Construct the full path to the year folder
    year_folder_path = os.path.join(yearly_output_folder, year_folder_name)
    
    # Check if the item in the yearly output folder is indeed a directory
    if os.path.isdir(year_folder_path):
        # Collect all CSV files in the year folder
        csv_files = [file for file in os.listdir(year_folder_path) if file.endswith('.csv')]
        
        # Initialize an empty DataFrame to hold the combined data
        combined_data = pd.DataFrame(columns=['lat', 'lon'])
        
        # Iterate over each CSV file in the year folder
        for csv_file_name in csv_files:
            # Read the CSV file into a DataFrame
            csv_file_path = os.path.join(year_folder_path, csv_file_name)
            df = pd.read_csv(csv_file_path)
            
            # Extract the date from the CSV file name
            date = csv_file_name.split('.')[0]
            
            # Rename the 'SoilMoist_RZ_tavg' column to the date
            df.rename(columns={'Relative_Humidity_2m_09h': date}, inplace=True) # Adjust according to the variable name
            
            # Merge the current DataFrame with the combined DataFrame based on lat and lon
            combined_data = pd.merge(combined_data, df[['lat', 'lon', date]], on=['lat', 'lon'], how='outer')
        
        # Reorder the columns to have lat and lon as the first two columns
        combined_data = combined_data[['lat', 'lon'] + [col for col in combined_data.columns if col not in ['lat', 'lon']]]
        
        # Save the combined DataFrame to a CSV file
        output_csv_path = os.path.join(yearly_output_folder, f"{year_folder_name}.csv")
        combined_data.to_csv(output_csv_path, index=False)
        
        print(f"Combined data for {year_folder_name} and saved to {output_csv_path}")

Save the yearly files seaprately to a folder (yearly_files/humidity)

In [None]:
import pandas as pd
import os

def transpose_csv(input_folder, output_folder):
    # Walk through the directory tree
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            # Skip hidden macOS metadata files
            if filename.endswith('.csv') and not filename.startswith('._'):
                input_file = os.path.join(root, filename)
                try:
                    # Read the CSV file into a pandas DataFrame
                    df = pd.read_csv(input_file)
                    
                    # Transpose the DataFrame
                    df_transposed = df.T
                    
                    # Construct the output folder path mirroring the input folder structure
                    relative_path = os.path.relpath(root, input_folder)
                    output_subfolder = os.path.join(output_folder, relative_path)
                    
                    # Create the output subfolder if it doesn't exist
                    os.makedirs(output_subfolder, exist_ok=True)
                    
                    # Generate the output filename
                    output_file = os.path.join(output_subfolder, filename)
                    
                    # Write the transposed DataFrame to a new CSV file
                    df_transposed.to_csv(output_file, index=True, header=False)
                    print(f"Transposed and saved: {input_file} -> {output_file}")
                except Exception as e:
                    print(f"Error processing file {input_file}: {e}")

if __name__ == "__main__":
    input_folder = "yearly_files/humidity"
    output_folder = "transposed/humidity"

    os.makedirs(output_folder, exist_ok=True)

    transpose_csv(input_folder, output_folder)
    print("Transposition complete.")

In [None]:
import csv
import os
import pandas as pd

def transform_files(input_folder, output_file):
    # Initialize an empty DataFrame to store transformed data from all files
    combined_df = pd.DataFrame()

    # Iterate through each file in the input folder
    for filename in os.listdir(input_folder):
        # Skip hidden metadata files
        if filename.endswith('.csv') and not filename.startswith('._'):
            input_file = os.path.join(input_folder, filename)
            try:
                # Read the file and transform the data
                transformed_df = transform_file(input_file)
                # Append transformed data to the combined DataFrame
                combined_df = pd.concat([combined_df, transformed_df], ignore_index=True)
            except Exception as e:
                print(f"Error processing file {input_file}: {e}")

    # Write the combined DataFrame to a single output file
    combined_df.to_csv(output_file, index=False)
    print("Transformation complete. Combined data saved to:", output_file)

def transform_file(input_file):
    transformed_rows = []

    with open(input_file, 'r') as f_in:
        reader = csv.reader(f_in)
        latitudes = next(reader)
        longitudes = next(reader)
        # Skip the header row
        next(reader)
        for row in reader:
            date = row[0]
            values = row[1:]
            for lat, lon, value in zip(latitudes, longitudes, values):
                transformed_rows.append([lat, lon, date, value])

    # Convert transformed rows to a DataFrame
    transformed_df = pd.DataFrame(transformed_rows, columns=['Lat', 'Lon', 'Date', 'humidity'])  # Adjust column names as needed
    return transformed_df

if __name__ == "__main__":
    input_folder = 'transposed/humidty'  # Specify the input folder containing CSV files
    output_file = 'variables/cloudcover.csv'  # Specify the output file name
    
    transform_files(input_folder, output_file)
