In [54]:
import lightningchart as lc
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

with open('D:/Computer Aplication/WorkPlacement/Projects/shared_variable.txt', 'r') as f:
    mylicensekey = f.read().strip()
lc.set_license(mylicensekey)

In [55]:
generation_data = pd.read_csv('D:/wenprograming23/src/team6/Implementation-of-a-Solar-Power-Modelling-Application-with-LightningChart-Python/Dataset/Plant_2_Generation_Data.csv')
weather_data = pd.read_csv('D:/wenprograming23/src/team6/Implementation-of-a-Solar-Power-Modelling-Application-with-LightningChart-Python/Dataset/Plant_2_Weather_Sensor_Data.csv')

generation_data['DATE_TIME'] = pd.to_datetime(generation_data['DATE_TIME'])
weather_data['DATE_TIME'] = pd.to_datetime(weather_data['DATE_TIME'])

print(generation_data.head())
print(weather_data.head())

   DATE_TIME  PLANT_ID       SOURCE_KEY  DC_POWER  AC_POWER  DAILY_YIELD  \
0 2020-05-15   4136001  4UPUqMRk7TRMgml       0.0       0.0  9425.000000   
1 2020-05-15   4136001  81aHJ1q11NBPMrL       0.0       0.0     0.000000   
2 2020-05-15   4136001  9kRcWv60rDACzjR       0.0       0.0  3075.333333   
3 2020-05-15   4136001  Et9kgGMDl729KT4       0.0       0.0   269.933333   
4 2020-05-15   4136001  IQ2d7wF4YD8zU1Q       0.0       0.0  3177.000000   

    TOTAL_YIELD  
0  2.429011e+06  
1  1.215279e+09  
2  2.247720e+09  
3  1.704250e+06  
4  1.994153e+07  
            DATE_TIME  PLANT_ID       SOURCE_KEY  AMBIENT_TEMPERATURE  \
0 2020-05-15 00:00:00   4136001  iq8k7ZNt4Mwm3w0            27.004764   
1 2020-05-15 00:15:00   4136001  iq8k7ZNt4Mwm3w0            26.880811   
2 2020-05-15 00:30:00   4136001  iq8k7ZNt4Mwm3w0            26.682055   
3 2020-05-15 00:45:00   4136001  iq8k7ZNt4Mwm3w0            26.500589   
4 2020-05-15 01:00:00   4136001  iq8k7ZNt4Mwm3w0            26.596148 

In [56]:
merged_data = pd.merge(generation_data, weather_data, on=['DATE_TIME', 'PLANT_ID'])
merged_data['DATE_TIME'] = pd.to_datetime(merged_data['DATE_TIME'])
print(merged_data.head())


   DATE_TIME  PLANT_ID     SOURCE_KEY_x  DC_POWER  AC_POWER  DAILY_YIELD  \
0 2020-05-15   4136001  4UPUqMRk7TRMgml       0.0       0.0  9425.000000   
1 2020-05-15   4136001  81aHJ1q11NBPMrL       0.0       0.0     0.000000   
2 2020-05-15   4136001  9kRcWv60rDACzjR       0.0       0.0  3075.333333   
3 2020-05-15   4136001  Et9kgGMDl729KT4       0.0       0.0   269.933333   
4 2020-05-15   4136001  IQ2d7wF4YD8zU1Q       0.0       0.0  3177.000000   

    TOTAL_YIELD     SOURCE_KEY_y  AMBIENT_TEMPERATURE  MODULE_TEMPERATURE  \
0  2.429011e+06  iq8k7ZNt4Mwm3w0            27.004764           25.060789   
1  1.215279e+09  iq8k7ZNt4Mwm3w0            27.004764           25.060789   
2  2.247720e+09  iq8k7ZNt4Mwm3w0            27.004764           25.060789   
3  1.704250e+06  iq8k7ZNt4Mwm3w0            27.004764           25.060789   
4  1.994153e+07  iq8k7ZNt4Mwm3w0            27.004764           25.060789   

   IRRADIATION  
0          0.0  
1          0.0  
2          0.0  
3          0

In [57]:
selected_features = merged_data[['DC_POWER', 'AC_POWER', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']]

corr_matrix = selected_features.corr()

heatmap_data = corr_matrix.values.tolist()

In [None]:
dashboard = lc.Dashboard(
    rows=2,  
    columns=3,  
    theme=lc.Themes.Dark
)

def create_scatter_chart(dashboard, title, x_values, y_values, xlabel, ylabel, column_index, row_index):
    chart = dashboard.ChartXY(
        column_index=column_index,
        row_index=row_index
    )
    chart.set_title(title)

    scatter_series = chart.add_point_series()
    scatter_series.add(x_values, y_values)
    scatter_series.set_point_color(lc.Color(0, 0, 255, 128))

    chart.get_default_x_axis().set_title(xlabel)
    chart.get_default_y_axis().set_title(ylabel)

# Scatter Plots

# AC Power vs. Time
create_scatter_chart(
    dashboard,
    'AC Power vs. Time',
    merged_data['DATE_TIME'].dt.hour + merged_data['DATE_TIME'].dt.minute / 60,
    merged_data['AC_POWER'],
    'Time (hours)',
    'AC Power (kW)',
    column_index=0,
    row_index=0
)

# AC Power vs. Irradiation
create_scatter_chart(
    dashboard,
    'AC Power vs. Irradiation',
    merged_data['IRRADIATION'],
    merged_data['AC_POWER'],
    'Irradiation (Wm^-2)',
    'AC Power (kW)',
    column_index=1,
    row_index=0
)

# AC Power vs. Module Temperature
create_scatter_chart(
    dashboard,
    'AC Power vs. Module Temperature',
    merged_data['MODULE_TEMPERATURE'],
    merged_data['AC_POWER'],
    'Module Temperature (Celsius)',
    'AC Power (kW)',
    column_index=2,
    row_index=0
)

# AC Power vs. Ambient Temperature
create_scatter_chart(
    dashboard,
    'AC Power vs. Ambient Temperature',
    merged_data['AMBIENT_TEMPERATURE'],
    merged_data['AC_POWER'],
    'Ambient Temperature (Celsius)',
    'AC Power (kW)',
    column_index=0,
    row_index=1
)

# AC Power vs. Daily Yield
create_scatter_chart(
    dashboard,
    'AC Power vs. Daily Yield',
    merged_data['DAILY_YIELD'],
    merged_data['AC_POWER'],
    'Daily Yield (Wh)',
    'AC Power (kW)',
    column_index=1,
    row_index=1
)

# AC Power vs. Total Yield
create_scatter_chart(
    dashboard,
    'AC Power vs. Total Yield',
    merged_data['TOTAL_YIELD'],
    merged_data['AC_POWER'],
    'Total Yield (Wh)',
    'AC Power (kW)',
    column_index=2,
    row_index=1
)

# Open the dashboard
dashboard.open('browser')

In [58]:
# Initialize the heatmap chart
chart = lc.ChartXY(
    theme=lc.Themes.White,
    title='Correlation Heatmap of Solar Plant Features'
)

# Create the heatmap grid series
series = chart.add_heatmap_grid_series(
    columns=len(heatmap_data),
    rows=len(heatmap_data[0])
)

# Customize the heatmap
series.hide_wireframe()
series.set_intensity_interpolation(False)
series.invalidate_intensity_values(heatmap_data)

# Define color steps for the heatmap
series.set_palette_colors(
    steps=[
        {"value": -1.0, "color": lc.Color(0, 0, 255)},  # Blue for negative correlation
        {"value": 0.0, "color": lc.Color(255, 255, 255)},  # White for no correlation
        {"value": 1.0, "color": lc.Color(255, 0, 0)}  # Red for positive correlation
    ],
    look_up_property='value',
    percentage_values=False
)

# Customize the x and y axes
x_axis = chart.get_default_x_axis()
x_axis.set_title('Feature Index')
x_axis.set_interval(0, len(selected_features.columns))

y_axis = chart.get_default_y_axis()
y_axis.set_title('Feature Index')
y_axis.set_interval(0, len(selected_features.columns))

# Open the chart
chart.open('browser')


127.0.0.1 - - [24/Aug/2024 10:25:29] "GET / HTTP/1.1" 200 -


<lightningchart.charts.chart_xy.ChartXY at 0x18c9af89090>

In [59]:
features = merged_data[['IRRADIATION', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE']]
target = merged_data['AC_POWER']

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(random_state=42),
    "LightGBM": LGBMRegressor(random_state=42),
    "CatBoost": CatBoostRegressor(verbose=0, random_state=42)
}


In [60]:
dashboard = lc.Dashboard(
    columns=3,
    rows=2,
    theme=lc.Themes.Dark
)

def add_feature_importance_to_dashboard(dashboard, model_name, importances, column_index, row_index):
    """
    Add a feature importance bar chart to the dashboard.
    """
    importance_df = pd.DataFrame({'Feature': features.columns, 'Importance': importances})
    importance_df = importance_df.sort_values(by='Importance', ascending=False)

    chart = dashboard.BarChart(
        column_index=column_index,
        row_index=row_index,
        row_span=1,
        column_span=1
    )
    chart.set_title(f'{model_name} Feature Importances')
    
    bar_data = [{'category': str(row['Feature']), 'value': float(row['Importance'])} for _, row in importance_df.iterrows()]
    chart.set_data(bar_data)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), features.columns)
    ])

for i, (model_name, model) in enumerate(models.items()):
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    pipeline.fit(X_train, y_train)

    if hasattr(model, 'feature_importances_'):
        importances = model.feature_importances_
    elif hasattr(model, 'coef_'):
        importances = np.abs(model.coef_)  
    else:
        importances = np.zeros(len(features.columns))

    add_feature_importance_to_dashboard(
        dashboard=dashboard,
        model_name=model_name,
        importances=importances,
        column_index=i % 3,
        row_index=i // 3
    )

estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(random_state=42)),
    ('lgbm', LGBMRegressor(random_state=42)),
    ('cat', CatBoostRegressor(verbose=0, random_state=42))
]

voting_reg = VotingRegressor(estimators=estimators)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', voting_reg)
])

pipeline.fit(X_train, y_train)

ensemble_importances = np.mean([
    pipeline.named_steps['regressor'].estimators_[i].feature_importances_
    for i in range(len(pipeline.named_steps['regressor'].estimators_))
], axis=0)

add_feature_importance_to_dashboard(
    dashboard=dashboard,
    model_name='Ensemble Methods',
    importances=ensemble_importances,
    column_index=2,
    row_index=1
)

dashboard.open('browser')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585


127.0.0.1 - - [24/Aug/2024 10:25:48] "GET / HTTP/1.1" 200 -


<lightningchart.charts.dashboard.Dashboard at 0x18c99fb51d0>

In [61]:
dashboard = lc.Dashboard(
    columns=3,
    rows=2,
    theme=lc.Themes.Dark
)

def add_prediction_vs_actual_to_dashboard(dashboard, model_name, model, column_index, row_index):
    """
    Add a plot of predicted vs actual values to the dashboard.
    """
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])

    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    # Convert all to float64 to avoid serialization issues
    y_test_float = y_test.astype('float64')
    y_pred_float = y_pred.astype('float64')
    
    chart = dashboard.ChartXY(column_index=column_index, row_index=row_index, column_span=1, row_span=1)
    chart.set_title(f'{model_name} Predictions vs Actual')
    
    pred_series = chart.add_point_series()
    pred_series.add(y_test_float.tolist(), y_pred_float.tolist()).set_name('Predicted vs Actual')
    
    line_series = chart.add_line_series()
    min_val = min(min(y_test_float), min(y_pred_float))
    max_val = max(max(y_test_float), max(y_pred_float))
    line_series.add([min_val, max_val], [min_val, max_val])
    line_series.set_name('Ideal Line')
    
    chart.get_default_x_axis().set_title('Actual AC Power')
    chart.get_default_y_axis().set_title('Predicted AC Power')
    
    legend = chart.add_legend(horizontal=False)
    legend.add(pred_series)
    legend.add(line_series)


preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), features.columns)
    ])

for i, (model_name, model) in enumerate(models.items()):
    add_prediction_vs_actual_to_dashboard(dashboard, model_name, model, column_index=i % 3, row_index=i // 3)

estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(random_state=42)),
    ('lgbm', LGBMRegressor(random_state=42)),
    ('cat', CatBoostRegressor(verbose=0, random_state=42))
]

voting_reg = VotingRegressor(estimators=estimators)

add_prediction_vs_actual_to_dashboard(dashboard, 'Ensemble Methods', voting_reg, column_index=2, row_index=1)

dashboard.open('browser')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000145 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585


127.0.0.1 - - [24/Aug/2024 10:26:08] "GET / HTTP/1.1" 200 -


<lightningchart.charts.dashboard.Dashboard at 0x18c99e1dd50>

In [62]:
# dates_test = pd.to_datetime(merged_data.loc[X_test.index, 'DATE_TIME'])

# # Preprocessor
# preprocessor = ColumnTransformer(
#     transformers=[
#         ('num', StandardScaler(), ['IRRADIATION', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'])
#     ])

# # Initialize dashboard
# dashboard = lc.Dashboard(rows=2, columns=3, theme=lc.Themes.Dark)

# # Function to plot smoothed average MSE over time
# def add_smoothed_mse_to_dashboard(dashboard, model_name, model, dates, column_index, row_index):
#     """
#     Add a line plot of smoothed average MSE over time to the dashboard.
#     """
#     pipeline = Pipeline(steps=[
#         ('preprocessor', preprocessor),
#         ('regressor', model)
#     ])
    
#     # Fit the model and predict
#     pipeline.fit(X_train, y_train)
#     y_pred = pipeline.predict(X_test)
    
#     # Calculate MSE over time
#     mse_over_time = [mean_squared_error(y_test[:i+1], y_pred[:i+1]) for i in range(len(y_pred))]
    
#     # Smooth the MSE using a rolling average
#     mse_smoothed = pd.Series(mse_over_time).rolling(window=10, min_periods=1).mean()
    
#     # Create chart
#     chart = dashboard.ChartXY(column_index=column_index, row_index=row_index, column_span=1, row_span=1)
#     chart.set_title(f'{model_name} Smoothed MSE Over Time')
    
#     mse_series = chart.add_line_series()
#     mse_series.add(dates_test.astype(np.int64) / 10**9, mse_smoothed.tolist()).set_name('Smoothed MSE')
    
#     # Set the x-axis to handle numeric time values
#     x_axis = chart.get_default_x_axis()
#     x_axis.set_title('Date Time')
#     x_axis.set_tick_strategy('DateTime')
    
#     y_axis = chart.get_default_y_axis()
#     y_axis.set_title('Smoothed MSE')
    
#     legend = chart.add_legend(horizontal=False)
#     legend.add(mse_series)

# # Loop through models and add their smoothed MSE over time to the dashboard
# for i, (model_name, model) in enumerate(models.items()):
#     add_smoothed_mse_to_dashboard(dashboard, model_name, model, dates_test, column_index=i % 3, row_index=i // 3)

# # Adding Ensemble Methods model
# voting_reg = VotingRegressor(estimators=[
#     ('lr', LinearRegression()),
#     ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
#     ('xgb', XGBRegressor(random_state=42)),
#     ('lgbm', LGBMRegressor(random_state=42)),
#     ('cat', CatBoostRegressor(verbose=0, random_state=42))
# ])

# add_smoothed_mse_to_dashboard(dashboard, 'Ensemble Methods', voting_reg, dates_test, column_index=2, row_index=1)
# dashboard.open('browser')

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000399 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 765
[LightGBM] [Info] Number of data points in the train set: 54158, number of used features: 3
[LightGBM] [Info] Start training from score 241.709585


127.0.0.1 - - [24/Aug/2024 10:26:46] "GET / HTTP/1.1" 200 -


<lightningchart.charts.dashboard.Dashboard at 0x18c99e0c910>

127.0.0.1 - - [24/Aug/2024 10:27:11] "GET / HTTP/1.1" 200 -


<lightningchart.charts.dashboard.Dashboard at 0x18c895b0e10>

In [65]:
from scipy.stats import gaussian_kde

features = ['IRRADIATION', 'AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'AC_POWER']
data = merged_data[features]

dashboard = lc.Dashboard(
    rows=len(features),
    columns=len(features),
    theme=lc.Themes.Dark
)

def create_density_chart(dashboard, title, values, column_index, row_index):
    chart = dashboard.ChartXY(
        column_index=column_index,
        row_index=row_index
    )
    chart.set_title(title)

    density = gaussian_kde(values)
    x_vals = np.linspace(values.min(), values.max(), 100)
    y_vals = density(x_vals)

    series = chart.add_area_series()
    series.add(x_vals.tolist(), y_vals.tolist())
    series.set_name('Density')
    series.set_fill_color(lc.Color(30, 144, 255, 128))

    chart.get_default_x_axis().set_title('Value')
    chart.get_default_y_axis().set_title('Density')

def create_scatter_chart(dashboard, title, x_values, y_values, xlabel, ylabel, column_index, row_index):
    chart = dashboard.ChartXY(
        column_index=column_index,
        row_index=row_index
    )
    chart.set_title(title)

    scatter_series = chart.add_point_series()
    scatter_series.add(x_values, y_values)
    scatter_series.set_point_color((lc.Color(30, 144, 255)))

    chart.get_default_x_axis().set_title(xlabel)
    chart.get_default_y_axis().set_title(ylabel)

for row_index, y_col in enumerate(features):
    for column_index, x_col in enumerate(features):
        if row_index == column_index:
            values = data[x_col].astype(float).tolist()
            title = f'Density of {x_col}'
            create_density_chart(dashboard, title, np.array(values), column_index, row_index)
        else:
            x_values = data[x_col].astype(float).tolist()
            y_values = data[y_col].astype(float).tolist()
            title = f'{x_col} vs {y_col}'
            create_scatter_chart(dashboard, title, x_values, y_values, x_col, y_col, column_index, row_index)

dashboard.open('browser')

127.0.0.1 - - [24/Aug/2024 10:31:48] "GET / HTTP/1.1" 200 -


<lightningchart.charts.dashboard.Dashboard at 0x18c9a3b2a90>