In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
# Reading solar output dataset from json file
solar_output_db = pd.read_json(r'rawData\solar_output\merged_lean_solar_data.json',orient='split')

# Dropping unnecessary columns and renaming the remaining one
solar_output = solar_output_db.drop(['lat', 'lon', "gemeentenaam","population", "solar_data_list", "temperature_data_list"], axis=1)
solar_output = solar_output.rename(index=str, columns={"solar_output_list": "data"})

# Restructuring the dataframe in such a way that rows become columns
solar_output_muniInColumns = solar_output[['data']].unstack().apply(pd.Series).T
solar_output_muniInColumns = solar_output_muniInColumns.xs('data', axis=1, drop_level=True)

avg_solar_output = pd.DataFrame(columns=["AvgValue"])
avg_solar_output["AvgValue"] = solar_output_muniInColumns.loc[:,:].mean(axis=1)

avg_solar_output.to_json(r'rawData\solar_output\mean_solar_output_hourly_10years.json', orient='split')

In [None]:
ax = avg_solar_output.loc[:8760, :].plot(figsize=[16,6])
ax.set_xlabel("Hours", fontsize= 14)
ax.set_ylabel("Solar Output (KWh)", fontsize= 14)
plt.title("Average solar output of the Netherlands in 2020", {'fontsize': 18})
fig = ax.get_figure()
# fig.savefig('solaroutput2020.png')

In [None]:
ax = avg_solar_output.loc[0:24, :].plot(figsize=[16,6])
ax.set_xlabel("Hours", fontsize= 14)
ax.set_ylabel("Solar Output (KWh)", fontsize= 14)
plt.title("Daily profile of average solar output", {'fontsize': 18})
fig = ax.get_figure()
# fig.savefig('solaroutputDaily.png')

### Checkpoint - 1

In [None]:
avg_solar_output = pd.read_json(r'rawData\solar_output\mean_solar_output_hourly_10years.json', orient='split')

In [None]:
# Extending data set to 30 years:
avg_solar_output = pd.concat([avg_solar_output, avg_solar_output, avg_solar_output], ignore_index=True)

In [None]:
for d in tqdm(range(3650*3)):
    for h in range(24):
        avg_solar_output.loc[(d*24)+h, "dayIndex"] = d

representativeDays = [149, 336, 1255, 1381, 1414, 1486, 2239, 3284, 3462]
listHours = []

for y in range(3):
    for d in representativeDays:
        for h in range(24):
            listHours.append(24*(d+y*3650) + h)

avg_solar_output = avg_solar_output[avg_solar_output.index.isin(listHours)]
avg_solar_output = avg_solar_output.reset_index(drop=True)

In [None]:
three_hourly_solar_output = pd.DataFrame(columns=["Value"])

emptyListValue = []
emptyListDay = []

for i in tqdm(range(int(len(avg_solar_output)/3))):
    day = avg_solar_output.loc[1+(3*i),"dayIndex"]
    value = avg_solar_output.loc[1+(3*i),"AvgValue"] * 3
    emptyListValue.append(value)
    emptyListDay.append(day)
    
three_hourly_solar_output["Value"] = emptyListValue
three_hourly_solar_output["dayIndex"] = emptyListDay


In [None]:
daily_solar_output = pd.DataFrame(columns=["Value", "dayIndex"])
for i in tqdm(range(int(len(three_hourly_solar_output)/8))):
    daily_solar_output.loc[i, "dayIndex"] = three_hourly_solar_output.loc[(0 + 8*i), "dayIndex"]
    daily_solar_output.loc[i, "Value"] = list(three_hourly_solar_output.loc[(0 + 8*i):(7 + 8*i), "Value"])

In [None]:
daily_solar_output.to_json(r"processedData\processed_solar_output\daily_3hourly_SolarOutput.json", orient='split')