In [None]:
import sqlite3
import pandas as pd
    
def ts_id_from_datetime(dt):
    """
    Converts a datetime object to a timestamp ID.
    """
    start_date = pd.Timestamp('2017-01-01 00:00:00')
    return 3682081 + (dt - start_date).total_seconds() / 60

def get_production_data_with_timestamps(cursor, wf_id):
    """
    Fetches production data with timestamps as datetime objects for a specific wind farm.
    """
    query = """
        SELECT ts_id, production
        FROM productions
        WHERE wf_id = ?
    """
    cursor.execute(query, (wf_id,))
    result = cursor.fetchall()
    if result:
        production_data = pd.DataFrame(result, columns=['ts_id', 'production'])
        production_data['ts_id'] = production_data['ts_id'].apply(ts_id_to_datetime)
        production_data = production_data.rename(columns={'ts_id': 'timestamp'})
        return production_data
    else:
        print(f"No production data found for wind farm {wf_id}")
        return None

def get_capacity_at_timestamp(cursor, wf_id, timestamp):
    """
    Calculates the wind farm capacity at a specific timestamp based on 'ministry_of_energy' data.
    """
    query = """
        SELECT SUM(additional_unit_power_electrical) AS capacity
        FROM ministry_of_energy
        WHERE wf_id = ? AND acceptence_date <= ?
    """
    cursor.execute(query, (wf_id, str(timestamp)))
    result = cursor.fetchone()
    capacity = result[0] if result[0] is not None else 0  # Handle potential NULL values
    return capacity

def update_productions_table(cursor, connection, production_data, wf_id):
    """
    Updates the 'productions' table with the calculated capacity factor for each row.
    """
    for index, row in production_data.iterrows():
        update_query = """
            UPDATE productions
            SET capacity_factor = ?
            WHERE wf_id = ? AND ts_id = ?
        """
        cursor.execute(update_query, (row['capacity_factor'], wf_id, row['ts_id']))
    connection.commit()

# Helper function (assuming you have this from previous examples)
def ts_id_to_datetime(ts_id):
    """Converts a timestamp ID to a datetime object."""
    start_date = pd.Timestamp('2017-01-01 00:00:00')
    return start_date + pd.Timedelta(minutes=ts_id - 3682081)

In [None]:
def calculate_and_store_hourly_capacity_factors(db_path):
    """
    Calculates hourly capacity factors for all wind farms and stores them in the 'productions' table.

    Args:
        db_path (str): Path to the SQLite database file.
    """
    connection = sqlite3.connect(db_path)
    cursor = connection.cursor()

    # Get wind farm IDs
    cursor.execute("SELECT DISTINCT wf_id FROM productions order by wf_id")
    wf_ids = [row[0] for row in cursor.fetchall()]

    for wf_id in wf_ids:
        print(f"Processing wind farm {wf_id}")

        # Get production data for the wind farm
        production_data = get_production_data_with_timestamps(cursor, wf_id)
        if production_data is None:
            continue    
            
        # Add columns for capacity and capacity factor
        production_data['capacity_factor'] = -99.0                
        
        from src.DatabaseGetInfo import DatabaseAnalyzer
        analyzer = DatabaseAnalyzer.WindFarmAnalyzer(db_path)
        moe = analyzer.get_moe_data(wf_id)
        moe['acceptance_date'] = pd.to_datetime(moe['acceptance_date'])
        # Get the new installed power and the aggregated installed power next to it from moe data (moe has the column named acceptance_date for the year data)
        # reorder the columns by the acceptance_date
        moe = moe.sort_values(by='acceptance_date')
        moe['agg_capacity'] = moe['additional_unit_power_electrical'].cumsum()

        
        # Calculate hourly capacity factor by using the data from moe table and the agg_capacity column and the date column
        for index, row in production_data.iterrows():
            # get the capacity at the timestamp
            capacity = moe.loc[moe['acceptance_date'] <= row['timestamp'], 'agg_capacity'].iloc[-1]
            # calculate the capacity factor
            capacity_factor = row['production'] / capacity if capacity > 0 else 0
            production_data.at[index, 'capacity_factor'] = capacity_factor
        
        # convert timestamp back to ts_id
        production_data['ts_id'] = production_data['timestamp'].apply(ts_id_from_datetime).astype(int)
                
        #return production_data
        # Update 'productions' table with capacity factor
        update_productions_table(cursor, connection, production_data, wf_id)

    connection.close()

In [None]:
# Example usage
import os
path = os.path.abspath(os.path.join(os.path.curdir, '..', '..','..', 'wfd_izmir.db'))
calculate_and_store_hourly_capacity_factors(path)

In [None]:

# aggregate by month
a_monthly = a.groupby(a['timestamp'].dt.to_period('M')).mean()
a_monthly = a_monthly.reset_index()


# plot capacity factor and make the chart wide
a_monthly


In [None]:
#pd.plotting.register_matplotlib_converters()

# plot the capacity factor
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
plt.plot(a_monthly.index, a_monthly['capacity_factor'])
plt.show()
