In [None]:
import pandas as pd
import matplotlib.pyplot as plt


def read_elexon_data(csv_file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Ensure the 'Date' column is in datetime format
    df['PublishTime'] = pd.to_datetime(df['PublishTime'])
    df['SettlementDate'] = pd.to_datetime(df['SettlementDate'])
    df['StartTime'] = pd.to_datetime(df['StartTime'])

    df = df.sort_values(by='PublishTime', ascending=True, kind='stable').drop(columns=['Dataset'])
    df = df.reset_index(drop=True) 

    current_year = df['SettlementDate'].loc[20]

    # to remove edge effects
    for i in df['SettlementDate']:
        if i < current_year:
            df.drop(df[df['SettlementDate'] < current_year].index, inplace=True)

    # ** ai generated code **
    df = df.pivot_table(
        index=['PublishTime', 'StartTime', 'SettlementDate', 'SettlementPeriod'],
        columns='FuelType', # turn fuel types into columns
        values='Generation', # assign the generation values to the new columns
        aggfunc='first' # ** not actually sure what this does... **
    ).reset_index()

    df.columns.name = None
    df['GAS'] = df['CCGT'] + df['OCGT']
    inter_cols = [col for col in df.columns if 'INT' in col] # ai generated code
    df['INTERCONNECTORS'] = df[inter_cols].sum(axis=1)
    df = df.drop(columns=['CCGT', 'OCGT'] + inter_cols)
    df['Demand'] = df.iloc[:,4:14].sum(axis=1) # this is the total demand, which is the sum of all generation types

    print("Elexon data:", df.shape)

    return df 


def read_neso_data(csv_file):
    df = pd.read_csv(csv_file)

    df['SETTLEMENT_DATE'] = pd.to_datetime(df['SETTLEMENT_DATE'])
    df['Forecast_Datetime'] = pd.to_datetime(df['Forecast_Datetime'])
    
    df = df.sort_values(by='SETTLEMENT_DATE', ascending=True, kind='stable').drop(columns=['DATE_GMT', 'TIME_GMT','source_file'])

    sameday = (
    df["SETTLEMENT_DATE"].dt.normalize() ==
    df["Forecast_Datetime"].dt.normalize()
    )

    df = df[sameday]

    df = df.drop_duplicates(
        subset=["SETTLEMENT_DATE", "SETTLEMENT_PERIOD"],
        keep="first"
    )

    df = df.reset_index(drop=True)

    print("NESO data:", df.shape)

    
    
    return print(df.head())

read_neso_data('Neso2025.csv')

def combined_dataset(elexon_csv, neso_csv):

    a = read_elexon_data(elexon_csv)
    b = read_neso_data(neso_csv)

    ab = pd.concat([a, b]).reset_index(drop=True)

    return ab


#combined_dataset('Elexon2025.csv', 'Neso2025.csv')