In [24]:
import pandas as pd
from typing import Optional

def resample_ci(ci10min_df: pd.DataFrame, dt_col: str = 'Datetime') -> pd.DataFrame:
    """
    Resamples a DataFrame from 10-minute intervals to 15-minute intervals.

    The resampling follows these rules:
        1. Retain original data if the minute is 'xx:00' or 'xx:30'.
        2. Shift data with minute 'xx:10' to 'xx:15'.
        3. Shift data with minute 'xx:40' to 'xx:45'.

    Parameters:
    -----------
    ci10min_df : pd.DataFrame
        Input DataFrame containing at least the datetime column specified by `dt_col`.
    dt_col : str, optional
        Name of the datetime column in `ci10min_df`. Defaults to 'Datetime'.

    Returns:
    --------
    pd.DataFrame
        A new DataFrame resampled to 15-minute intervals based on the specified rules.

    Raises:
    -------
    ValueError
        If the datetime column contains unexpected minute values not handled by the resampling rules.
    """
    # Create a copy of the input DataFrame to avoid modifying the original data
    df = ci10min_df.copy()

    # Ensure the datetime column is of datetime type
    df[dt_col] = pd.to_datetime(df[dt_col])

    def shift_time(dt: pd.Timestamp) -> pd.Timestamp:
        """
        Shifts the timestamp based on specified minute rules.

        Parameters:
        -----------
        dt : pd.Timestamp
            Original timestamp.

        Returns:
        --------
        pd.Timestamp
            Shifted timestamp according to the resampling rules.
        """
        if dt.minute == 0 or dt.minute == 30:
            # Retain the original timestamp for xx:00 and xx:30
            return dt.replace(second=0, microsecond=0)
        elif dt.minute == 10:
            # Shift xx:10 to xx:15
            return dt.replace(minute=15, second=0, microsecond=0)
        elif dt.minute == 40:
            # Shift xx:40 to xx:45
            return dt.replace(minute=45, second=0, microsecond=0)

    # Apply the shifting function to create a new 'resampled_time' column
    df['resampled_time'] = df[dt_col].apply(shift_time)

    # Drop the original datetime column as it's no longer needed
    df = df.drop(columns=[dt_col])

    # Group by the resampled_time and aggregate the data
    # Here, we'll take the first occurrence in each group, but you can modify this as needed
    # It's important to specify the aggregation for each column
    # Assuming 'Value' is the other column; adjust accordingly for your DataFrame
    aggregation_dict = {col: 'first' for col in df.columns if col != 'resampled_time'}
    ci15min_df = df.groupby('resampled_time').agg(aggregation_dict).reset_index()

    # Rename 'resampled_time' back to the original datetime column name
    ci15min_df = ci15min_df.rename(columns={'resampled_time': dt_col})

    return ci15min_df


In [25]:
ci10min_df = pd.read_csv('./extracted_cloud_csv/eebuilding1_raw_overview.csv', parse_dates=['Datetime'])
ci10min_df 


Unnamed: 0,Datetime,Cloud Index_95 (-)
0,2024-06-30 23:30:00,0.000000
1,2024-06-30 23:50:00,0.000000
2,2024-08-01 00:40:00,0.000000
3,2024-08-01 01:00:00,0.000000
4,2024-08-01 01:30:00,0.000000
...,...,...
1706,2024-08-19 23:40:00,0.776471
1707,2024-08-19 23:50:00,0.643137
1708,2024-08-20 00:00:00,0.713725
1709,2024-08-20 00:10:00,0.686275


In [26]:
ci15min_df = resample_ci(ci10min_df)
ci15min_df

Unnamed: 0,Datetime,Cloud Index_95 (-)
0,2024-06-30 23:30:00,0.000000
1,2024-08-01 00:45:00,0.000000
2,2024-08-01 01:00:00,0.000000
3,2024-08-01 01:30:00,0.000000
4,2024-08-01 01:45:00,0.000000
...,...,...
1110,2024-08-19 23:15:00,0.541176
1111,2024-08-19 23:30:00,0.756863
1112,2024-08-19 23:45:00,0.776471
1113,2024-08-20 00:00:00,0.713725
