In [8]:
import pandas as pd
from tsfresh.feature_extraction import feature_calculators

def calculate_and_save_features(input_file, predict_feature, time_column):
    """
    Reads the CSV file, calculates specific features, and saves them to an Excel file.

    Parameters:
        input_file (str): Path to the input CSV file.
        predict_feature (str): Name of the column containing the feature to predict.
        time_column (str): Name of the column containing the time information.

    Returns:
        str: Path to the Excel file where the calculated features are saved.
    """

    # Read the CSV file
    training_set = pd.read_csv(input_file)

    predict_feature_data = training_set[predict_feature]

    # Calculate the size of each part
    total_rows = len(predict_feature_data)
    part_size = total_rows // 3

    # Divide the dataset into three equal parts
    part1 = predict_feature_data.iloc[:part_size].reset_index(drop=True)
    part2 = predict_feature_data.iloc[part_size: 2 * part_size].reset_index(drop=True)
    part3 = predict_feature_data.iloc[2 * part_size:].reset_index(drop=True)

    parts = {'WholeDataSet': predict_feature_data}

    # Function to calculate features for a given part
    def calculate_features(part_name, part):
        """
        Calculates specific features for a given part of the time series.

        Parameters:
            part_name (str): Name of the part.
            part (pd.Series): Part of the time series data.

        Returns:
            dict: Dictionary containing calculated features.
        """
        time_series = pd.read_csv(input_file)[[time_column, predict_feature]]
        total_Q1 = time_series[predict_feature].quantile(0.25)
        total_Q3 = time_series[predict_feature].quantile(0.75)
        total_iqr = total_Q3 - total_Q1
        total_std_dev = time_series[predict_feature].std()
        features_dict = {}
        features_dict[f'{part_name}_STD_DEV'] = total_std_dev
        features_dict[f'{part_name}_IQR'] = total_iqr
        features_dict[f'{part_name}_AbsEnergy'] = feature_calculators.abs_energy(part.values)
        features_dict[f'{part_name}_AbsoluteMaximum'] = feature_calculators.absolute_maximum(part.values)
        param_autocorrelation = [{"f_agg": "var", "maxlag": 10}]
        agg_autocorrelation = feature_calculators.agg_autocorrelation(part.values, param=param_autocorrelation)
        features_dict[f'{part_name}_AggAutocorrelation'] = agg_autocorrelation[0][1]
        approximate_entropy = feature_calculators.approximate_entropy(part.values, m=2, r=0.1)            
        features_dict[f'{part_name}_ApproximateEntropy'] = approximate_entropy
        augmented_dickey_fuller = feature_calculators.augmented_dickey_fuller(part.values, param=[{"attr": "teststat", "autolag": "AIC"}])
        features_dict[f'{part_name}_AugmentedDickeyFuller'] = augmented_dickey_fuller[0][1]
        autocorrelation_lag5 = feature_calculators.autocorrelation(part.values, lag=5)
        features_dict[f'{part_name}_AutocorrelationLag5'] = autocorrelation_lag5
        benford_correlation = feature_calculators.benford_correlation(part.values)
        features_dict[f'{part_name}_BenfordCorrelation'] = benford_correlation
        binned_entropy = feature_calculators.binned_entropy(part.values, max_bins=10)
        features_dict[f'{part_name}_BinnedEntropy'] = binned_entropy
        c3 = feature_calculators.c3(part.values, lag=1)
        features_dict[f'{part_name}_C3'] = c3
        change_quantiles = feature_calculators.change_quantiles(part.values, ql=0.2, qh=0.8, isabs=False, f_agg="mean")
        features_dict[f'{part_name}_ChangeQuantiles'] = change_quantiles
        cid_ce = feature_calculators.cid_ce(part.values, normalize=True)
        features_dict[f'{part_name}_CID_CE'] = cid_ce
        count_above_mean = feature_calculators.count_above_mean(part.values)
        features_dict[f'{part_name}_CountAboveMean'] = count_above_mean
        count_below_mean = feature_calculators.count_below_mean(part.values)
        features_dict[f'{part_name}_CountBelowMean'] = count_below_mean
        features_dict[f'{part_name}_Fourier_Entropy'] = feature_calculators.fourier_entropy(part.values, bins=10)
        features_dict[f'{part_name}_Kurtosis'] = feature_calculators.kurtosis(part.values)
        features_dict[f'{part_name}_Last_Location_Of_Maximum'] = feature_calculators.last_location_of_maximum(part.values)
        features_dict[f'{part_name}_Last_Location_Of_Minimum'] = feature_calculators.last_location_of_minimum(part.values)
        features_dict[f'{part_name}_Lempel_Ziv_Complexity'] = feature_calculators.lempel_ziv_complexity(part.values, bins=10)
        features_dict[f'{part_name}_Longest_Strike_Above_Mean'] = feature_calculators.longest_strike_above_mean(part.values)
        features_dict[f'{part_name}_Longest_Strike_Below_Mean'] = feature_calculators.longest_strike_below_mean(part.values)
        # Matrix Profile
        # param_matrix_profile = [{"sample_pct": 0.1, "threshold": 0.2, "feature": "min"}]
        # matrix_profile = feature_calculators.matrix_profile(part.values, param=param_matrix_profile)
        features_dict[f'{part_name}_Maximum'] = feature_calculators.maximum(part.values)
        features_dict[f'{part_name}_Mean'] = feature_calculators.mean(part.values)
        features_dict[f'{part_name}_Mean_Absolute_Change'] = feature_calculators.mean_abs_change(part.values)
        features_dict[f'{part_name}_Mean_Change'] = feature_calculators.mean_change(part.values)
        features_dict[f'{part_name}_Mean_of_N_Absolute_Maximum'] = feature_calculators.mean_n_absolute_max(part.values, number_of_maxima=5)
        features_dict[f'{part_name}_Mean_Second_Derivative_Central'] = feature_calculators.mean_second_derivative_central(part.values)
        features_dict[f'{part_name}_Median'] = feature_calculators.median(part.values)
        features_dict[f'{part_name}_Minimum'] = feature_calculators.minimum(part.values)
        features_dict[f'{part_name}_Number_of_Crossings'] = feature_calculators.number_crossing_m(part.values, m=0)
        features_dict[f'{part_name}_Number_of_CWT_Peaks'] = feature_calculators.number_cwt_peaks(part.values, n=5)
        features_dict[f'{part_name}_Number_of_Peaks'] = feature_calculators.number_peaks(part.values, n=5)
        features_dict[f'{part_name}_Percentage_of_Reoccurring_Datapoints_to_All_Datapoints'] = feature_calculators.percentage_of_reoccurring_datapoints_to_all_datapoints(part.values)
        features_dict[f'{part_name}_Percentage_of_Reoccurring_Values_to_All_Values'] = feature_calculators.percentage_of_reoccurring_values_to_all_values(part.values)
        features_dict[f'{part_name}_Permutation_Entropy'] = feature_calculators.permutation_entropy(part.values, tau=1, dimension=3)
        features_dict[f'{part_name}_Quantile'] = feature_calculators.quantile(part.values, q=0.75)
        features_dict[f'{part_name}_Range_Count'] = feature_calculators.range_count(part.values, min=5, max=10)
        features_dict[f'{part_name}_Ratio_Beyond_R_Sigma'] = feature_calculators.ratio_beyond_r_sigma(part.values, r=2.0)
        features_dict[f'{part_name}_Ratio_Value_Number_to_Time_Series_Length'] = feature_calculators.ratio_value_number_to_time_series_length(part.values)
        features_dict[f'{part_name}_Root_Mean_Square'] = feature_calculators.root_mean_square(part.values)
        features_dict[f'{part_name}_Sample_Entropy'] = feature_calculators.sample_entropy(part.values)
        features_dict[f'{part_name}_Skewness'] = feature_calculators.skewness(part.values)
        features_dict[f'{part_name}_Sum_of_Reoccurring_Data_Points'] = feature_calculators.sum_of_reoccurring_data_points(part.values)
        features_dict[f'{part_name}_Sum_of_Reoccurring_Values'] = feature_calculators.sum_of_reoccurring_values(part.values)
        features_dict[f'{part_name}_Sum_Values'] = feature_calculators.sum_values(part.values)
        features_dict[f'{part_name}_Time_Reversal_Asymmetry_Statistic'] = feature_calculators.time_reversal_asymmetry_statistic(part.values, lag=2)
        #features_dict[f'{part_name}_Value_Count'] = feature_calculators.value_count(part.values, value=5)
        features_dict[f'{part_name}_Variance'] = feature_calculators.variance(part.values)
        features_dict[f'{part_name}_Variation_Coefficient'] = feature_calculators.variation_coefficient(part.values)

        
        return features_dict

    # Calculate features for each part
    features_list = [calculate_features(part_name, part) for part_name, part in parts.items()]

    # Combine features into a single dictionary
    combined_features = {}
    for features in features_list:
        combined_features.update(features)

    # Convert the dictionary to a DataFrame
    all_features_df = pd.DataFrame([combined_features])

    # Save the features to an Excel file
    output_file = "features.xlsx"
    #print(all_features_df)
    all_features_df.to_excel(output_file, index=False)
    return all_features_df


input_file = 'AT_combined.csv'
predict_feature = "Low Carbon Percentage"
time_column = "Datetime (UTC)"
output_file = calculate_and_save_features(input_file, predict_feature, time_column)
print(f"Features saved to {output_file}")
