In [None]:
def add_sin_cos_transform(df, column, period):
    """
    Adds sine and cosine transformations for cyclical data.
    
    Args:
        df (pd.DataFrame): Input DataFrame containing the data.
        column (str): Name of the column to be transformed.
        period (int): Cyclical period of the values (e.g., 24 for hours, 12 for months).
    
    Returns:
        pd.DataFrame: DataFrame with new sine and cosine columns.
    """
    
    # Transformation
    df[f'{column}_sin'] = np.sin(2 * np.pi * df[column] / period)
    df[f'{column}_cos'] = np.cos(2 * np.pi * df[column] / period)
    return df


In [None]:
def build_lags(df, column, group_by='id', time_column='timestamp'):
    """
    Calculates various features for a specific column in a DataFrame.
    
    Args:
        df (pd.DataFrame): Input DataFrame containing time series data.
        column (str): Name of the column for which features are to be calculated.
        group_by (str): Name of the grouping column (e.g., 'id').
        time_column (str): Name of the time column.
    
    Returns:
        pd.DataFrame: DataFrame with calculated features including lagged values and rolling statistics.
    """
    # Copy the DataFrame to make changes
    df = df.copy()

    # Grouping by ID
    grouped = df.groupby(group_by)
    
    # Calculate features for each group
    for name, group in grouped:
        group = group.sort_values(time_column)  # Sort by timestamp
        
        # Lag features
        for lag in [1, 2, 3]:
            df.loc[group.index, f'{column}_lag_{lag}'] = group[column].shift(lag)
        df.loc[group.index, f'{column}_lag_1day'] = group[column].shift(24 * 4)  # Assuming quarter-hourly data
        df.loc[group.index, f'{column}_lag_1week'] = group[column].shift(24 * 4 * 7)
        df.loc[group.index, f'{column}_lag_1year'] = group[column].shift(24 * 4 * 365)  # Assuming no leap years

        # Rolling mean and median
        df.loc[group.index, f'{column}_rolling_mean'] = group[column].rolling(window=96).mean()
        df.loc[group.index, f'{column}_rolling_median'] = group[column].rolling(window=96).median()
        
        # Autocorrelation (Lag-1)
        if len(group[column]) > 1:
            df.loc[group.index, f'{column}_autocorr'] = group[column].autocorr(lag=1)
        else:
            df.loc[group.index, f'{column}_autocorr'] = np.nan
    
    return df


In [None]:
def clean_data(df):
    """
    Cleans and organizes the DataFrame by dropping unnecessary columns and reordering the remaining ones.
    
    Args:
        df (pd.DataFrame): Input DataFrame containing the data.
    
    Returns:
        pd.DataFrame: Cleaned DataFrame with specified columns reordered and renamed.
    """
    df = df.drop(columns=["id", "Eigenverbrauch", "PLZ", "Ort", "Installierte, nominale Speicherkapazität (kWh)", "Kategorie", "dateOfConstruction", "time", "coord_id", "date"])

    # Columns you want to define first
    desired_columns = ['Überschuss', 'year', 'month', 'hour', 'time_in_minutes', 'day', 'day_of_week', 'is_weekend']

    # Dynamically reorder columns
    remaining_columns = [col for col in df.columns if col not in desired_columns]
    new_column_order = desired_columns + remaining_columns

    # Reorder the DataFrame
    df = df[new_column_order]

    df = df.rename(columns={"Überschuss": "feed_in:kWh",
                            "Produktion": "production:kWh",
                            "PanelPeakLeistung": "panel_peak_power:kwp",
                            "Anstellwinkel" : "tilt:deg",
                            "Ausrichtung_Grad" : "orientation:deg",
                            "surfaceAreaOfBuilding" : "surface_area:m2",
                            "numberOfFloors": "number_of_floors",
                            "dwellingCount" : "dwelling_count",
                            "totalSurfaceAreaOfDwellings" : "total_surface_area_of_dwelling:m2",
                            "direct_rad_tilt_orientation" : "direct_rad_tilt_orientation:W",
                            "global_rad_tilt_orientation" : "global_rad_tilt_orientation:W",
                            "is_raining" : "is_raining:idx",
                            "is_holiday" : "is_holiday:idx",
                            })

    return df
