In [None]:
import os
import DatabaseAnalyzer
import DatabasePlotter as dbPlot

# Database path
path = os.path.abspath('/home/wheatley/WFD/wfd.db')
#path = os.path.abspath('/home/wfd/WFD/wfd.db') # remote server
analyzer = DatabaseAnalyzer.WindFarmAnalyzer(path)

wf_id_list = analyzer.get_wf_id_list() # select the first wind farm id (id = 9)

#wf_id = wf_id_list[0]
wf_id = 310

start_date = '2020-01-01'
end_date = '2023-12-31'


# Farm Info # DICTIONARY
#analyzer.get_farm_info(wf_id) #TODO: This is broken, check the SQL...

# Farm Info (capacity related) # DATAFRAME
#analyzer.get_wf_data(wf_id)            # EPDK
#analyzer.get_moe_data(wf_id)           # Ministry of Energy
#analyzer.get_wf_turbines_data(wf_id)   # TUREB

# Location related functions
#analyzer.get_turbine_locations(wf_id, centroid=False) # TUPLE OR LIST OF TUPLES
#analyzer.find_elevations(wf_id) # LIST OF ELEVATIONS
#analyzer.find_closest_four_era5_location(wf_id, distance=False) # LIST

# Production (optionally capacity factor) data # DATAFRAME
#analyzer.get_wind_production_data(wf_id, start_date, end_date, CF=True, CF_ava=True, frequency='hourly', type='uevm', availability=True)
# ERA5 data # DATAFRAME
#analyzer.get_era5_data(wf_id, start_date, end_date, grid_number=0, variables_to_plot=['ws10']) # takes 1.5 minutes bc of closest location search


In [None]:
# Print Farm Info
#dbPlot.print_farm_info(analyzer, wf_id)
#dbPlot.print_capacity_info(analyzer, wf_id) # FIX THIS
### Fix dates from unix time to normal dates on database
### Return dataframe for plotting capacity changes with production data

# Location related functions
a= dbPlot.plot_turbine_centroids(analyzer, wf_id)

# Plot production data
#dbPlot.plot_production(analyzer, wf_id, start_date, end_date, type='uevm')
#dbPlot.plot_monthly_production(analyzer, wf_id, start_date, end_date)

# Plot ERA5 data
#dbPlot.plot_era5_data(analyzer, wf_id, start_date, end_date, grid_number=0, variables_to_plot=['temp', 'pressure', 'dew', 'sensible_heat', 'ws100', 'wd100', 'ws10', 'wd10'])

# Plot production, capacity and capacity factor together
#dbPlot.plot_production_capacity_CF(analyzer, wf_id, start_date, end_date)


#print(analyzer.check_solar(wf_id, start_date, end_date))
a

In [None]:
prod = analyzer.get_wind_production_data(wf_id, start_date, end_date, CF=False, frequency='hourly', type='uevm', availability=False)
era5_ws100 = analyzer.get_era5_data(wf_id, start_date, end_date, grid_number=0, variables_to_plot=['ws100'])



import matplotlib.pyplot as plt
merged = prod.merge(era5_ws100, on='timestamp', how='inner')

plt.figure(figsize=(10, 10))
plt.scatter(merged['ws100'], merged['production'], s=0.5)
plt.xlabel('Wind Speed (m/s)')
plt.ylabel('Production (MWh)')
plt.title('Wind Speed vs Production')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# normalize both the production and wind speed data 0 to 1
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
merged['production'] = scaler.fit_transform(merged['production'].values.reshape(-1, 1))
merged['ws100'] = scaler.fit_transform(merged['ws100'].values.reshape(-1, 1))


fig, ax1 = plt.subplots()

# set figure size
fig.set_size_inches(25, 10)

color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Production (kWh)', color=color)
ax1.plot(merged['timestamp'], merged['production'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Wind Speed at 100m (m/s)', color=color)  # we already handled the x-label with ax1
ax2.plot(merged['timestamp'], merged['ws100'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()

In [None]:
# calculate the correlation between production and wind speed
from scipy.stats import pearsonr

corr, _ = pearsonr(merged['production'], merged['ws100'])
print('Pearsons correlation: %.3f' % corr)

In [None]:
# find the covariance between production and wind speed and remove the outliers
import numpy as np

cov = np.cov(merged['production'], merged['ws100'])
print(cov)

# remove the outliers
from scipy import stats
    
z = np.abs(stats.zscore(merged))

merged = merged[(z < 3).all(axis=1)]

plt.figure(figsize=(10, 10))
plt.scatter(merged['ws100'], merged['production'], s=0.5)   
plt.xlabel('Wind Speed (m/s)')
plt.ylabel('Production (MWh)')
plt.title('Wind Speed vs Production')
plt.show()


In [None]:
# plot
fig, ax1 = plt.subplots()

# set figure size
fig.set_size_inches(25, 10)

color = 'tab:red'
ax1.set_xlabel('Date')
ax1.set_ylabel('Production (kWh)', color=color)

ax1.plot(merged['timestamp'], merged['production'], color=color)

ax1.tick_params(axis='y', labelcolor=color)
    
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('Wind Speed at 100m (m/s)', color=color)  # we already handled the x-label with ax1
ax2.plot(merged['timestamp'], merged['ws100'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped

plt.show()

In [None]:
# Aggregated capacity factor
import pandas as pd
import concurrent.futures
import matplotlib.pyplot as plt
import numpy as np

start_date = '2019-01-01'
end_date = '2023-12-31'
    
agg_cf = pd.DataFrame()

remove_array = [2, 12, 14, 20, 63, 64, 130, 138, 197, 252, 262, 252]
wf_id_list_tmp = [x for x in wf_id_list if x not in remove_array]

def process_wf_id(wf_id):
    cf_df = analyzer.get_wind_production_data(wf_id, start_date, end_date, CF=True, frequency='monthly', type='uevm')
    if cf_df is not None: 
        return cf_df
    else:
        return None  

with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
    results = executor.map(process_wf_id, wf_id_list_tmp)

# Filter out None results (errors) and concatenate the successful ones
agg_cf = pd.concat([result for result in results if result is not None])
agg_cf = agg_cf.groupby('timestamp').mean().reset_index()

fig, ax = plt.subplots()

# set figure size
fig.set_size_inches(25, 10)

ax.plot(agg_cf['timestamp'], agg_cf['capacity_factor'], label='Capacity Factor')
z = np.polyfit(range(len(agg_cf['timestamp'])), agg_cf['capacity_factor'], 1)
p = np.poly1d(z)
ax.plot(agg_cf['timestamp'], p(range(len(agg_cf['timestamp']))), 'r--', label='Trendline')

ax.set_xlabel('Date')
ax.set_ylabel('Capacity Factor')
ax.legend() 

plt.show()

In [None]:
for wf_id in wf_id_list:
    try:
        dbPlot.plot_production_capacity_CF(analyzer, wf_id, start_date, end_date)
    except Exception as e:
        print(f'Error for wf_id: {wf_id}')