In [None]:
import os
import xarray as xr
import numpy as np

# Define the path to the main folder containing all the year folders
main_folder_path = "data_in_nc4/forest_cover"

# Define the path to the output folder where all yearly output folders will be saved
output_main_folder_path = "raw_files/forest_cover_csv"

# Create the output folder if it does not exist
os.makedirs(output_main_folder_path, exist_ok=True)

# Iterate over year folders in the main folder
for year_folder_name in os.listdir(main_folder_path):
    # Construct the full path to the year folder
    year_folder_path = os.path.join(main_folder_path, year_folder_name)
    
    # Check if the item in the main folder is indeed a directory
    if os.path.isdir(year_folder_path):
        # Define the path to the new folder where CSV files will be saved for this year
        output_year_folder_path = os.path.join(output_main_folder_path, f"{year_folder_name}_unique_coordinates")
        os.makedirs(output_year_folder_path, exist_ok=True)
        
        # Iterate over files in the year folder
        for file_name in os.listdir(year_folder_path):
            # Check if the file is a NetCDF file
            if file_name.endswith(".nc"):
                # Construct the full path to the file
                file_path = os.path.join(year_folder_path, file_name)
                
                # Extract the date from the file name
                file_date_parts = file_name.split('_')
                file_date = None
                for part in file_date_parts:
                    if len(part) == 8 and part.isdigit():
                        file_date = part
                        break
                
                if file_date is None:
                    print(f"Unable to extract date from file name: {file_name}")
                    continue
                
                # Perform the operations as in the previous code snippet
                original_data = xr.open_dataset(file_path)
                lat_name = [dim for dim in original_data.coords if 'lat' in dim][0]
                lon_name = [dim for dim in original_data.coords if 'lon' in dim][0]
                target_lat = np.arange(6.5, 38.5, 0.25)
                target_lon = np.arange(66.5, 100, 0.25)
                regridded_data = original_data.interp({lat_name: target_lat, lon_name: target_lon}, method='nearest')
                regridded_data = regridded_data[['forestcoverfraction']]
                regridded_dataframe = regridded_data.to_dataframe().reset_index()
                unique_coordinates = regridded_dataframe.drop_duplicates(subset=['lat', 'lon'])
                csv_file_path = os.path.join(output_year_folder_path, f"{file_date}.csv")  # Corrected file name
                unique_coordinates.to_csv(csv_file_path, index=False)
                
                print(f"Processed {file_path} and saved output to {csv_file_path}")

Processed data_in_nc4/forest_c\Forest_cover\MODIS-TERRA_C6.1__MOD44B__ForestCoverFraction__LPDAAC__GLOBAL__0.5degree__UHAM-ICDC__20030306__fv0.01.nc and saved output to data_in_csv/new_csv/forest_cover_csv\Forest_cover_unique_coordinates\20030306.csv
Processed data_in_nc4/forest_c\Forest_cover\MODIS-TERRA_C6.1__MOD44B__ForestCoverFraction__LPDAAC__GLOBAL__0.5degree__UHAM-ICDC__20180306__fv0.01.nc and saved output to data_in_csv/new_csv/forest_cover_csv\Forest_cover_unique_coordinates\20180306.csv
Processed data_in_nc4/forest_c\Forest_cover\MODIS-TERRA_C6.1__MOD44B__ForestCoverFraction__LPDAAC__GLOBAL__0.5degree__UHAM-ICDC__20150306__fv0.01.nc and saved output to data_in_csv/new_csv/forest_cover_csv\Forest_cover_unique_coordinates\20150306.csv
Processed data_in_nc4/forest_c\Forest_cover\MODIS-TERRA_C6.1__MOD44B__ForestCoverFraction__LPDAAC__GLOBAL__0.5degree__UHAM-ICDC__20160305__fv0.01.nc and saved output to data_in_csv/new_csv/forest_cover_csv\Forest_cover_unique_coordinates\20160305.

In [None]:
import os
import pandas as pd

def drop_first_column_and_save(directory):
    # Iterate through all files in the directory
    for filename in os.listdir(directory):
        # Check if the file is a CSV file
        if filename.endswith('.csv'):
            filepath = os.path.join(directory, filename)
            
            # Read the CSV file into a DataFrame
            df = pd.read_csv(filepath)
            
            # Drop the first column
            df = df.iloc[:, 1:]
            
            # Save the modified DataFrame back to a CSV file with the same name
            df.to_csv(filepath, index=False)
            
            print(f"Processed and saved: {filename}")

# Specify the directory containing the CSV files
directory = 'raw_files/forest_cover_csv/Forest_cover_unique_coordinates'

# Call the function
drop_first_column_and_save(directory)


Processed and saved: 2003.csv
Processed and saved: 2018.csv
Processed and saved: 2015.csv
Processed and saved: 2016.csv
Processed and saved: 2017.csv
Processed and saved: 2014.csv
Processed and saved: 2013.csv
Processed and saved: 2012.csv
Processed and saved: 2011.csv
Processed and saved: 2010.csv
Processed and saved: 2009.csv
Processed and saved: 2008.csv
Processed and saved: 2007.csv
Processed and saved: 2006.csv
Processed and saved: 2005.csv
Processed and saved: 2004.csv


In [None]:
import os
import pandas as pd
from datetime import datetime

import logging

logging.basicConfig(level=logging.INFO)

def process_files(input_folder, output_folder):
    """
    Process CSV files containing forest cover data.

    Args:
    - input_folder (str): Path to the folder containing input CSV files.
    - output_folder (str): Path to the folder where processed CSV files will be saved.
    """
    try:
        # Ensure the output folder exists
        os.makedirs(output_folder, exist_ok=True)

        # List all CSV files in the input folder
        files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

        for file in files:
            # Extract the year from the filename
            year = int(file.split('.')[0])  # Adjust this line if the filename format is different

            # Read the data from the file
            file_path = os.path.join(input_folder, file)
            df = pd.read_csv(file_path)

            # Create a DataFrame with all dates for the given year
            start_date = datetime(year, 1, 1)
            end_date = datetime(year, 12, 31)
            date_range = pd.date_range(start=start_date, end=end_date)

            # Ensure that the DataFrame contains 'Lat' and 'Lon' columns
            if 'lat' not in df.columns or 'lon' not in df.columns:
                raise ValueError("'lat' and 'lon' columns are missing in the DataFrame.")

            # Repeat each row in the DataFrame for each date in the date range
            repeated_df = pd.concat([df] * len(date_range), ignore_index=True)

            # Add the date column with the format YYYYMMDD
            repeated_df['Date'] = date_range.strftime('%Y%m%d').tolist() * len(df)

            # Reorder the columns
            repeated_df = repeated_df[['lat', 'lon', 'Date', 'forestcoverfraction']]

            # Save the new DataFrame to a file in the output folder
            output_file_path = os.path.join(output_folder, f'{year}_daily.csv')
            repeated_df.to_csv(output_file_path, index=False)

            logging.info(f'Processed {file} and saved to {output_file_path}')

    except Exception as e:
        logging.error(f'An error occurred: {str(e)}')

# Example usage:
input_folder = 'raw_files/forest_cover_csv/Forest_cover_unique_coordinates'
output_folder = 'raw_files/forest_cover_csv/daily_forest_cover'
process_files(input_folder, output_folder)


INFO:root:Processed 2003.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2003_daily.csv
INFO:root:Processed 2018.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2018_daily.csv
INFO:root:Processed 2015.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2015_daily.csv
INFO:root:Processed 2016.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2016_daily.csv
INFO:root:Processed 2017.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2017_daily.csv
INFO:root:Processed 2014.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2014_daily.csv
INFO:root:Processed 2013.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2013_daily.csv
INFO:root:Processed 2012.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2012_daily.csv
INFO:root:Processed 2011.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2011_daily.csv
INFO:root:Processed 2010.csv and saved to data_in_csv/new_csv/forest_cover_csv/hi\2010_daily.csv
INFO:root:Processed 2009.csv a

In [None]:
import os
import pandas as pd

# Path to the folder containing CSV files
folder_path = 'raw_files/forest_cover_csv/daily_forest_cover'

# Initialize an empty DataFrame to store the combined data
combined_data = pd.DataFrame()

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Read the CSV file into a DataFrame
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        
        # Rename columns
        df.rename(columns={'lat': 'Lat', 'lon': 'Lon'}, inplace=True)
        
        # Append the data vertically
        combined_data = pd.concat([combined_data, df], ignore_index=True)

# Save the combined DataFrame to a new CSV file
combined_data.to_csv('variables/forestcover.csv', index=False)

print("Combined CSV file saved successfully!")


Combined CSV file saved successfully!
