**Import libraries**

In [263]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error

**Load dataset**

In [264]:
df = pd.read_csv('cleaned.csv')
df.head()

Unnamed: 0,name,temp,humidity,precip,precipcover,windgust,solarradiation,solarenergy,uvindex,O3,SO2
0,bangkok,26.4,61.7,0.0,0.0,37.1,106.8,9.4,6,22.0,1.62069
1,bangkok,26.6,61.9,0.0,0.0,26.166667,100.4,8.6,5,17.0,1.185185
2,bangkok,26.8,62.3,0.0,0.0,24.061111,104.0,9.0,5,18.0,1.823529
3,bangkok,27.6,63.7,0.0,0.0,23.2325,90.2,7.9,5,21.0,1.0
4,bangkok,28.1,59.0,0.0,0.0,19.385714,127.3,10.8,5,23.0,1.0


In [265]:
df.columns

Index(['name', 'temp', 'humidity', 'precip', 'precipcover', 'windgust',
       'solarradiation', 'solarenergy', 'uvindex', 'O3', 'SO2'],
      dtype='object')

**Feature and Target**

In [266]:
x = df[['temp', 'humidity', 'precip', 'precipcover', 'windgust', 'solarenergy', 'uvindex', 'O3', 'SO2']]
y = df[['solarradiation']]

**Show feature**

In [267]:
x.head()

Unnamed: 0,temp,humidity,precip,precipcover,windgust,solarenergy,uvindex,O3,SO2
0,26.4,61.7,0.0,0.0,37.1,9.4,6,22.0,1.62069
1,26.6,61.9,0.0,0.0,26.166667,8.6,5,17.0,1.185185
2,26.8,62.3,0.0,0.0,24.061111,9.0,5,18.0,1.823529
3,27.6,63.7,0.0,0.0,23.2325,7.9,5,21.0,1.0
4,28.1,59.0,0.0,0.0,19.385714,10.8,5,23.0,1.0


**Show target**

In [268]:
y.head()

Unnamed: 0,solarradiation
0,106.8
1,100.4
2,104.0
3,90.2
4,127.3


**Feature scaling**

In [269]:
scaler = MinMaxScaler()
x_norm = scaler.fit_transform(x)
x_norm

array([[0.33070866, 0.30642202, 0.        , ..., 0.55555556, 0.25609756,
        0.02002225],
       [0.34645669, 0.31009174, 0.        , ..., 0.44444444, 0.19512195,
        0.00597372],
       [0.36220472, 0.31743119, 0.        , ..., 0.44444444, 0.20731707,
        0.02656546],
       ...,
       [0.69291339, 0.3559633 , 0.00157124, ..., 0.55555556, 0.08130081,
        0.13121094],
       [0.63779528, 0.45321101, 0.        , ..., 0.66666667, 0.18089431,
        0.13121094],
       [0.62204724, 0.4587156 , 0.01414116, ..., 0.66666667, 0.18651363,
        0.80645161]])

**Separate data sets for training and test data**

In [270]:
x_train, x_test, y_train, y_test = train_test_split(x_norm, y, test_size=0.3, random_state=42)

**Show the shape of train and test data**

In [271]:
print("Train: ", x_train.shape)
print("Test: ", x_test.shape)

Train:  (3318, 9)
Test:  (1422, 9)


**Linear Regression Model**

In [272]:
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
linear_pred = linear_model.predict(x_test)

linear_MAE = mean_absolute_error(y_test, linear_pred)
linear_MSE = mean_squared_error(y_test, linear_pred)
linear_RMSE = np.sqrt(linear_MSE)

**Support Vector Regression Model**

In [273]:
svr_model = SVR()
svr_model.fit(x_train, y_train)
svr_pred = svr_model.predict(x_test)

svr_MAE = mean_absolute_error(y_test, svr_pred)
svr_MSE = mean_squared_error(y_test, svr_pred)
svr_RMSE = np.sqrt(svr_MSE)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



**Comparison of actual and predicted values Linear Regression**

In [274]:
df = pd.DataFrame({'Actual': y_test.iloc[:100, 0], 'Predicted': linear_pred[:100][:,0], 'Data Point': range(100)})
fig = px.line(df, x='Data Point', y=['Actual', 'Predicted'], title='Comparison of actual and predicted values - <span style="color:red">Linear Regression</span>',
              labels={'value': 'Test Value', 'Data Point': 'Data Point'})
fig.update_layout(xaxis_title="Data Point", yaxis_title="Test Value", width=1400, height=650)
fig.show()

**Comparison of actual and predicted values Support Vector Regression**

In [275]:
df_svr = pd.DataFrame({'Actual': y_test.iloc[:100, 0], 'Predicted': svr_pred[:100], 'Data Point': range(100)})
fig_svr = px.line(df_svr, x='Data Point', y=['Actual', 'Predicted'], title='Comparison of actual and predicted values - <span style="color:red">Support Vector Regression</span>',
                  labels={'value': 'Test Value', 'Data Point': 'Data Point'})
fig_svr.update_layout(xaxis_title="Data Point", yaxis_title="Test Value", width=1400, height=650)
fig_svr.show()

**Check the performance of Linear Regression Model**

In [276]:
print("Mean Absolute Error (MAE):", linear_MAE)
print("Mean Squared Error (MSE):", linear_MSE)
print("Root Mean Squared Error (RMSE):", linear_RMSE)

Mean Absolute Error (MAE): 0.9161147737998058
Mean Squared Error (MSE): 1.328987280185861
Root Mean Squared Error (RMSE): 1.1528171061299624


**Check the performance of Support Vector Regression Model**

In [277]:
print("Mean Absolute Error (MAE):", svr_MAE)
print("Mean Squared Error (MSE):", svr_MSE)
print("Root Mean Squared Error (RMSE):", svr_RMSE)

Mean Absolute Error (MAE): 6.0002615739969904
Mean Squared Error (MSE): 139.41082750264533
Root Mean Squared Error (RMSE): 11.807236234726792


**Comparison of performance Metrics between Linear Regression and SVR**

In [278]:
data = {
    "Model": ["Linear Regression", "Support Vector Regression"],
    "MAE": [linear_MAE, svr_MAE],
    "MSE": [linear_MSE, svr_MSE],
    "RMSE": [linear_RMSE, svr_RMSE]
}
df_performance = pd.DataFrame(data)
df_performance_melted = df_performance.melt(id_vars=["Model"], var_name="Metric", value_name="Value")
fig = px.bar(df_performance_melted, x="Metric", y="Value", color="Model", barmode="group",
             title="Comparison of performance Metrics between Linear Regression and SVR",
             labels={"Value": "Metric Value", "Metric": "Metric", "Model": "Model"},
             color_discrete_map={"Linear Regression": "RoyalBlue", "Support Vector Regression": "Crimson"})
fig.update_layout(width=1400, height=650)
fig.show()

**Solar radiation of bangkok and surrounding provinces**

In [279]:
results_df

Unnamed: 0,Location,Linear Regression Prediction
0,bangkok,155.03625
1,nakhonpathom,154.397353
2,nonthaburi,155.038436
3,pathumthani,154.650925
4,samutprakan,156.26626
5,samutsakhon,155.861296


In [280]:
fig = px.bar(results_df, x='Location', y='Linear Regression Prediction',
             title='Solar Radiation of Bangkok and surrounding provinces',
             labels={'Predicted': 'Solar Radiation Prediction', 'Location': 'Location'},
             color='Location')
fig.show()