In [11]:
import pandas as pd
import numpy as np
import requests

In [5]:
df = pd.read_csv("PV_Elec_Gas3.csv")
df_filtered = df[['date', 'Cumulative_solar_power']]
print(df_filtered.head(30))

          date  Cumulative_solar_power
0   26/10/2011                     0.1
1   27/10/2011                    10.2
2   28/10/2011                    20.2
3   29/10/2011                    29.6
4   30/10/2011                    34.2
5   31/10/2011                    38.0
6    1/11/2011                    46.6
7    2/11/2011                    51.6
8    3/11/2011                    58.6
9    4/11/2011                    60.5
10   5/11/2011                    65.7
11   6/11/2011                    71.8
12   7/11/2011                    73.3
13   8/11/2011                    73.8
14   9/11/2011                    76.1
15  10/11/2011                    82.5
16  11/11/2011                    89.1
17  12/11/2011                    95.5
18  13/11/2011                    99.9
19  14/11/2011                   101.4
20  15/11/2011                   109.1
21  16/11/2011                   116.0
22  17/11/2011                   122.3
23  18/11/2011                   124.3
24  19/11/2011           

In [13]:
solar_url = r"https://power.larc.nasa.gov/api/temporal/daily/point?start=20111026&end=20201110&latitude=51.260197&longitude=4.402771&community=re&parameters=ALLSKY_SFC_SW_DWN%2CT2M&format=json&user=Research&header=true&time-standard=lst"
response = requests.get(url=solar_url, verify=True, timeout=30.00)
content = json.loads(response.content.decode('utf-8'))

In [19]:
data = {
    'Solar_Irradiance': content['properties']['parameter']['ALLSKY_SFC_SW_DWN'],
    'Temperature_2m': content['properties']['parameter']['T2M']
}

In [21]:
solarDF = pd.DataFrame(data)
solarDF.index = pd.to_datetime(solarDF.index, format='%Y%m%d')

In [23]:
solarDF

Unnamed: 0,Solar_Irradiance,Temperature_2m
2011-10-26,1.98,9.46
2011-10-27,1.94,9.83
2011-10-28,1.97,12.29
2011-10-29,1.10,12.71
2011-10-30,1.21,12.42
...,...,...
2020-11-06,1.89,6.35
2020-11-07,1.71,7.75
2020-11-08,1.08,9.58
2020-11-09,1.37,10.96


In [7]:
df_filtered.tail(15)

Unnamed: 0,date,Cumulative_solar_power
3289,27/10/2020,36402.0
3290,28/10/2020,36405.0
3291,29/10/2020,36409.0
3292,30/10/2020,36412.0
3293,31/10/2020,36415.0
3294,1/11/2020,36419.0
3295,2/11/2020,36422.0
3296,3/11/2020,36424.0
3297,4/11/2020,36430.0
3298,5/11/2020,36437.0


In [29]:
df_filtered['date'] = pd.to_datetime(df_filtered['date'], format='%d/%m/%Y')

solarDF = solarDF.reset_index()
solarDF.rename(columns={'index': 'date'}, inplace=True)

solarDF['date'] = pd.to_datetime(solarDF['date'], format='%Y-%m-%d')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['date'] = pd.to_datetime(df_filtered['date'], format='%d/%m/%Y')


In [31]:
df_filtered.head()

Unnamed: 0,date,Cumulative_solar_power
0,2011-10-26,0.1
1,2011-10-27,10.2
2,2011-10-28,20.2
3,2011-10-29,29.6
4,2011-10-30,34.2


In [110]:
df_filtered = df_filtered['Cumulative_solar_power'].diff()

df_filtered.to_csv("filtered_solar_data.csv", index=False)

In [112]:
df_filtered

0        NaN
1       10.1
2       10.0
3        9.4
4        4.6
        ... 
3299     8.0
3300     8.0
3301     8.0
3302     5.0
3303     3.0
Name: Cumulative_solar_power, Length: 3304, dtype: float64

In [35]:
solarDF.head()

Unnamed: 0,date,Solar_Irradiance,Temperature_2m
0,2011-10-26,1.98,9.46
1,2011-10-27,1.94,9.83
2,2011-10-28,1.97,12.29
3,2011-10-29,1.1,12.71
4,2011-10-30,1.21,12.42


In [33]:
merged_df = pd.merge(df_filtered, solarDF, on='date', how='inner')

merged_df.head()

Unnamed: 0,date,Cumulative_solar_power,Solar_Irradiance,Temperature_2m
0,2011-10-26,0.1,1.98,9.46
1,2011-10-27,10.2,1.94,9.83
2,2011-10-28,20.2,1.97,12.29
3,2011-10-29,29.6,1.1,12.71
4,2011-10-30,34.2,1.21,12.42


In [40]:
merged_df['Daily_solar_power'] = merged_df['Cumulative_solar_power'].diff()

merged_df.head()

Unnamed: 0,date,Cumulative_solar_power,Solar_Irradiance,Temperature_2m,Daily_solar_power
0,2011-10-26,0.1,1.98,9.46,
1,2011-10-27,10.2,1.94,9.83,10.1
2,2011-10-28,20.2,1.97,12.29,10.0
3,2011-10-29,29.6,1.1,12.71,9.4
4,2011-10-30,34.2,1.21,12.42,4.6


In [42]:
merged_df['Daily_solar_power'] = merged_df['Daily_solar_power'].fillna(0.1)

In [84]:
merged_df.head(40)

Unnamed: 0,date,Solar_Irradiance,Temperature_2m,Daily_solar_power
0,2011-10-26,1.98,9.46,0.1
1,2011-10-27,1.94,9.83,10.1
2,2011-10-28,1.97,12.29,10.0
3,2011-10-29,1.1,12.71,9.4
4,2011-10-30,1.21,12.42,4.6
5,2011-10-31,2.09,12.44,3.8
6,2011-11-01,1.78,11.3,8.6
7,2011-11-02,1.55,10.96,5.0
8,2011-11-03,0.76,14.0,7.0
9,2011-11-04,1.91,14.17,1.9


In [104]:
result = merged_df[merged_df['date'] == '2012-01-02']

In [106]:
result

Unnamed: 0,date,Solar_Irradiance,Temperature_2m,Daily_solar_power
68,2012-01-02,0.78,6.32,0.8


In [50]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [52]:
x = merged_df[['Solar_Irradiance', 'Temperature_2m']]
y = merged_df['Daily_solar_power']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

In [58]:
x.head()

Unnamed: 0,Solar_Irradiance,Temperature_2m
0,1.98,9.46
1,1.94,9.83
2,1.97,12.29
3,1.1,12.71
4,1.21,12.42


In [60]:
y.head()

0     0.1
1    10.1
2    10.0
3     9.4
4     4.6
Name: Daily_solar_power, dtype: float64

In [62]:
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

gbr.fit(x_train, y_train)

In [64]:
y_pred = gbr.predict(x_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [66]:
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

Mean Absolute Error (MAE): 3.4730394350493192
Mean Squared Error (MSE): 20.719292631760396
R² Score: 0.6821740061899753
