In [None]:
import os
import re
import xarray as xr
import pandas as pd
import numpy as np
from datetime import datetime
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Define the target latitude and longitude grids with 0.25 spacing
target_lat = np.arange(6.5, 38.5, 0.25)  # Target latitude grid with 0.25 spacing
target_lon = np.arange(66.5, 100, 0.25)  # Target longitude grid with 0.25 spacing

# Define the base path for the input and output directories
base_input_path = "data_in_nc4/NDVI"
base_output_path = "raw_files/ndvi"

# Ensure the base output directory exists
os.makedirs(base_output_path, exist_ok=True)

# Function to process a single NetCDF file
def process_file(file_path, target_lat, target_lon, output_file_path):
    try:
        logging.info(f"Processing file: {file_path}")
        
        original_data = xr.open_dataset(file_path, decode_times=False)
        lat_name = [dim for dim in original_data.coords if 'lat' in dim][0]
        lon_name = [dim for dim in original_data.coords if 'lon' in dim][0]
        
        regridded_data = original_data.interp({lat_name: target_lat, lon_name: target_lon}, method='nearest')
        regridded_data = regridded_data[['NDVI']]
        
        regridded_dataframe = regridded_data.to_dataframe().reset_index()
        regridded_dataframe = regridded_dataframe.rename(columns={lat_name: 'lat', lon_name: 'lon'})
        
        unique_coordinates = regridded_dataframe.drop_duplicates(subset=['lat', 'lon'])
        
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
        unique_coordinates.to_csv(output_file_path, index=False)
        logging.info(f"Processed and saved: {output_file_path}")
    except Exception as e:
        logging.error(f"Failed to process file {file_path} due to {e}")

# Walk through the directory structure and process each file
for root, dirs, files in os.walk(base_input_path):
    for file in files:
        if file.endswith(".nc"):
            input_file_path = os.path.join(root, file)
            
            # Extracting the date from the file name using regex
            match = re.search(r'\d{8}', file)  # Assuming the date format is YYYYMMDD
            if match:
                file_date = datetime.strptime(match.group(), '%Y%m%d').strftime('%Y-%m-%d')
            else:
                logging.warning(f"Date not found in filename: {file}")
                continue
            
            # Construct the output file path with date included in the file name
            relative_path = os.path.relpath(root, base_input_path)
            output_file_path = os.path.join(base_output_path, relative_path, f"NDVI_{file_date}.csv")
            
            # Process the file and save the output
            process_file(input_file_path, target_lat, target_lon, output_file_path)

logging.info("Processing completed.")


2024-06-06 11:24:05,322 - INFO - Processing file: data_in_nc4/NDVI\2016\VIIRS-Land_v001_NPP13C1_S-NPP_20160101_c20240124214015.nc
2024-06-06 11:24:06,983 - INFO - Processed and saved: data_in_csv/new_csv/ndvi_csv\2016\NDVI_2016-01-01.csv
2024-06-06 11:24:06,993 - INFO - Processing file: data_in_nc4/NDVI\2016\VIIRS-Land_v001_NPP13C1_S-NPP_20160102_c20240124214208.nc
2024-06-06 11:24:07,662 - INFO - Processed and saved: data_in_csv/new_csv/ndvi_csv\2016\NDVI_2016-01-02.csv
2024-06-06 11:24:07,677 - INFO - Processing file: data_in_nc4/NDVI\2016\VIIRS-Land_v001_NPP13C1_S-NPP_20160103_c20240124214416.nc
2024-06-06 11:24:08,442 - INFO - Processed and saved: data_in_csv/new_csv/ndvi_csv\2016\NDVI_2016-01-03.csv
2024-06-06 11:24:08,460 - INFO - Processing file: data_in_nc4/NDVI\2016\VIIRS-Land_v001_NPP13C1_S-NPP_20160104_c20240124214622.nc
2024-06-06 11:24:09,142 - INFO - Processed and saved: data_in_csv/new_csv/ndvi_csv\2016\NDVI_2016-01-04.csv
2024-06-06 11:24:09,161 - INFO - Processing file

In [None]:
import os
import pandas as pd

# Define the path to the yearly output folder
yearly_output_folder = "raw_files/ndvi"

# Iterate over year folders in the yearly output folder
for year_folder_name in os.listdir(yearly_output_folder):
    # Construct the full path to the year folder
    year_folder_path = os.path.join(yearly_output_folder, year_folder_name)
    
    # Check if the item in the yearly output folder is indeed a directory
    if os.path.isdir(year_folder_path):
        # Collect all CSV files in the year folder
        csv_files = [file for file in os.listdir(year_folder_path) if file.endswith('.csv')]
        
        # Initialize an empty DataFrame to hold the combined data
        combined_data = pd.DataFrame(columns=['lat', 'lon'])
        
        # List to store dates extracted from file names
        dates = []
        
        # Iterate over each CSV file in the year folder
        for csv_file_name in csv_files:
            # Read the CSV file into a DataFrame
            csv_file_path = os.path.join(year_folder_path, csv_file_name)
            try:
                df = pd.read_csv(csv_file_path)
                
                # Extract the date from the CSV file name and format it as YYYYMMDD
                date = csv_file_name.split('_')[1].split('.')[0].replace('-', '')
                dates.append(date)
                
                # Rename the 'NDVI' column to the date
                df.rename(columns={'NDVI': date}, inplace=True)
                
                # Merge the current DataFrame with the combined DataFrame based on lat and lon
                combined_data = pd.merge(combined_data, df[['lat', 'lon', date]], on=['lat', 'lon'], how='outer')
            except Exception as e:
                print(f"Error processing file {csv_file_path}: {e}")
        
        # Sort dates chronologically
        dates.sort()
        
        # Reorder the columns to have lat and lon as the first two columns followed by sorted date columns
        sorted_columns = ['lat', 'lon'] + dates
        combined_data = combined_data[sorted_columns]
        
        # Save the combined DataFrame to a CSV file
        output_csv_path = os.path.join(yearly_output_folder, f"{year_folder_name}.csv")
        try:
            combined_data.to_csv(output_csv_path, index=False)
            print(f"Combined data for {year_folder_name} and saved to {output_csv_path}")
        except Exception as e:
            print(f"Error saving file {output_csv_path}: {e}")


Combined data for 2010 and saved to data_in_csv/new_csv/ndvi_csv\2010.csv
Combined data for 2011 and saved to data_in_csv/new_csv/ndvi_csv\2011.csv
Combined data for 2012 and saved to data_in_csv/new_csv/ndvi_csv\2012.csv
Combined data for 2013 and saved to data_in_csv/new_csv/ndvi_csv\2013.csv
Combined data for 2018 and saved to data_in_csv/new_csv/ndvi_csv\2018.csv
Combined data for 2003 and saved to data_in_csv/new_csv/ndvi_csv\2003.csv
Combined data for 2008 and saved to data_in_csv/new_csv/ndvi_csv\2008.csv
Combined data for 2009 and saved to data_in_csv/new_csv/ndvi_csv\2009.csv
Combined data for 2014 and saved to data_in_csv/new_csv/ndvi_csv\2014.csv
Combined data for 2015 and saved to data_in_csv/new_csv/ndvi_csv\2015.csv
Combined data for 2016 and saved to data_in_csv/new_csv/ndvi_csv\2016.csv
Combined data for 2017 and saved to data_in_csv/new_csv/ndvi_csv\2017.csv
Combined data for 2006 and saved to data_in_csv/new_csv/ndvi_csv\2006.csv
Combined data for 2005 and saved to da

Save the yearly files seaprately to a folder (yearly_files/ndvi)

In [None]:
import pandas as pd
import os

def transpose_csv(input_folder, output_folder):
    # Walk through the directory tree
    for root, dirs, files in os.walk(input_folder):
        for filename in files:
            # Skip hidden macOS metadata files
            if filename.endswith('.csv') and not filename.startswith('._'):
                input_file = os.path.join(root, filename)
                try:
                    # Read the CSV file into a pandas DataFrame
                    df = pd.read_csv(input_file)
                    
                    # Transpose the DataFrame
                    df_transposed = df.T
                    
                    # Construct the output folder path mirroring the input folder structure
                    relative_path = os.path.relpath(root, input_folder)
                    output_subfolder = os.path.join(output_folder, relative_path)
                    
                    # Create the output subfolder if it doesn't exist
                    os.makedirs(output_subfolder, exist_ok=True)
                    
                    # Generate the output filename
                    output_file = os.path.join(output_subfolder, filename)
                    
                    # Write the transposed DataFrame to a new CSV file
                    df_transposed.to_csv(output_file, index=True, header=False)
                    print(f"Transposed and saved: {input_file} -> {output_file}")
                except Exception as e:
                    print(f"Error processing file {input_file}: {e}")

if __name__ == "__main__":
    input_folder = "yearly_files/ndvi"
    output_folder = "transposed/ndvi"

    os.makedirs(output_folder, exist_ok=True)

    transpose_csv(input_folder, output_folder)
    print("Transposition complete.")

In [None]:
import csv
import os
import pandas as pd

def transform_files(input_folder, output_file):
    # Initialize an empty DataFrame to store transformed data from all files
    combined_df = pd.DataFrame()

    # Iterate through each file in the input folder
    for filename in os.listdir(input_folder):
        # Skip hidden metadata files
        if filename.endswith('.csv') and not filename.startswith('._'):
            input_file = os.path.join(input_folder, filename)
            try:
                # Read the file and transform the data
                transformed_df = transform_file(input_file)
                # Append transformed data to the combined DataFrame
                combined_df = pd.concat([combined_df, transformed_df], ignore_index=True)
            except Exception as e:
                print(f"Error processing file {input_file}: {e}")

    # Write the combined DataFrame to a single output file
    combined_df.to_csv(output_file, index=False)
    print("Transformation complete. Combined data saved to:", output_file)

def transform_file(input_file):
    transformed_rows = []

    with open(input_file, 'r') as f_in:
        reader = csv.reader(f_in)
        latitudes = next(reader)
        longitudes = next(reader)
        # Skip the header row
        next(reader)
        for row in reader:
            date = row[0]
            values = row[1:]
            for lat, lon, value in zip(latitudes, longitudes, values):
                transformed_rows.append([lat, lon, date, value])

    # Convert transformed rows to a DataFrame
    transformed_df = pd.DataFrame(transformed_rows, columns=['Lat', 'Lon', 'Date', 'NDVI'])  # Adjust column names as needed
    return transformed_df

if __name__ == "__main__":
    input_folder = 'transposed/ndvi'  # Specify the input folder containing CSV files
    output_file = 'variables/ndvi.csv'  # Specify the output file name
    
    transform_files(input_folder, output_file)
