In [1]:
import pandas as pd
import numpy as np
import os

import plotly.graph_objects as go
import plotly.express as px
import plotly.subplots as sp

import joblib

In [41]:
#Update Pandas Settings
pd.set_option('display.width', None)  # to remove any wrapping
pd.set_option('display.max_columns', None)  # to show all columns
pd.set_option('display.width', 1000)

# IMPORTING DATA

In [44]:
#Main Definitions Here

def import_combine(folder, geo, file_count = 0):
    #loop through folder
    dfs = []
    # Generate file names, and read each file
    for i in range(0, file_count):
        filename = f'{folder}/{geo}_8760.hrs_{str(i)}.csv'
        # filename = f'{folder}/{geo}_{str(i)}.csv'
        try:            
            df = pd.read_csv(filename,sep = '#')
            dfs.append(df)

        except FileNotFoundError:
            print(f'File {filename} not found. Skipping.')

    combined_df = pd.concat(dfs, ignore_index=True)
    return(combined_df)


def clean_data(combined_df):
    # Function to convert the string representation of a list into an actual list of floats
    def convert_to_list(string):
        return eval(string)

    #Apply the function to 'vf_vec' and 'gf_vec' columns
    combined_df = combined_df.assign(vox_vec=lambda df: df['vox_vec'].apply(convert_to_list), 
                                 geo_vec=lambda df: df['geo_vec'].apply(convert_to_list)
                                 )

    #===================================================================================================================================================
    # # Split vox_hit into boolean columns
    # combined_df['xb_1'], combined_df['xb_2'], combined_df['xb_3'], combined_df['xb_4'], combined_df['xb_5'], combined_df['xb_6'], = zip(
    #     *combined_df['bound_hit'].apply(lambda v: (v[0], v[1], v[2], v[3], v[4], v[5])))
    
    #===================================================================================================================================================
    # Split vectors into 3 new columns each for gf_vec & vf_vec using lambda functions

    combined_df[('vox_vecX')], combined_df['vox_vecY'], combined_df['vox_vecZ'] = zip(*combined_df['vox_vec'].apply(lambda v: (v[0], v[1], v[2])))
    combined_df['geo_vecX'], combined_df['geo_vecY'], combined_df['geo_vecZ'] = zip(*combined_df['geo_vec'].apply(lambda v: (v[0], v[1], v[2])))

    #===================================================================================================================================================
    # Vector Math
    # Dot Product with Related Face
    combined_df['vox_dot'] = combined_df.apply(lambda row: np.dot(row['vox_vec'], row['geo_vec']), axis=1)
    
    #===================================================================================================================================================    
    #Remove Ir. Columns lists
    combined_df = combined_df.drop(columns=['geo_vec', 'vox_vec'])

    #===================================================================================================================================================
    # Move Columns to Last
    move_columns =  ['geo_rad', 'geo_s.hr']

    # Pop and reassign columns using a loop
    for col in move_columns:
        combined_df[col] = combined_df.pop(col)

    #===================================================================================================================================================
    #clean data
    output_df = combined_df.drop_duplicates()
    #===================================================================================================================================================
    #Return Data
    return(output_df)

In [45]:
#Import Training Data
folder = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/COLAB/CSV/17_cube dataset_step15_sunshaded"
geo = "cube_hole"
file_count = 343
combined_df = import_combine(folder,geo, file_count)
print(combined_df.shape)

#Clean Training Data
cleaned_df = clean_data(combined_df)
print(cleaned_df.shape)
cleaned_df.head()

#Pop & Group for Identifying Relationships
relation_df = cleaned_df.loc[:,'vox_rad':'geo_s.hr']

(35674, 8)
(31970, 13)


# EVALUATING DATA _ CORRELATION

In [9]:
import pandas as pd
import plotly.express as px
import numpy as np

divider = 3
# Calculate the correlation matrix
corr_matrix = relation_df.corr()

# Define custom labels for the columns and rows using a dictionary
custom_labels = {
    "vox_rad": "Box I.Rad",
    "sun_hit": "Sun Occ",
    "vox_vecX": "Box.X",
    "vox_vecY": "Box.Y",
    "vox_vecZ": "Box.Z",
    "geo_vecX": "Geo.X",
    "geo_vecY": "Geo.Y",
    "geo_vecZ": "Geo.Z",
    "vox_dot": "Dot Product",
    "geo_rad": "Geo. I.Rad",
    "geo_s.hr": "Geo. Sun Hrs."
}

# Create lists of custom labels for x and y axes
custom_x_labels = [custom_labels.get(col, col) for col in corr_matrix.columns]
custom_y_labels = [custom_labels.get(row, row) for row in corr_matrix.index]

# Create a heatmap using imshow from Plotly Express
fig = px.imshow(
    corr_matrix,
    labels=dict(color="Corr. Factor"),
    color_continuous_scale="RdBu",
    aspect='auto',
    x=custom_x_labels,
    y=custom_y_labels
)

# Text formatting for each cell in the correlation matrix
text_matrix = np.around(corr_matrix.values, decimals=3).astype(str)

# Add annotations
for i, row in enumerate(text_matrix):
    for j, value in enumerate(row):
        fig.add_annotation(x=j, y=i, text=value,
                           showarrow=False,
                           font=dict(size=5, color='black'),
                           align="center",
                           bgcolor="rgba(255,255,255,0)")  # Semi-transparent white background

# Update layout for custom label sizes and other text properties
fig.update_layout(
    title='',
    dragmode='select',
    width=1500/divider,
    height=725/divider,
    hovermode='closest',
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(len(custom_x_labels))),
        ticktext=custom_x_labels,
        tickfont=dict(size=10),  # Control x-axis label size
        tickangle=90  # Rotate x-axis labels by 90 degrees
    ),
    yaxis=dict(
        tickmode='array',
        tickvals=list(range(len(custom_y_labels))),
        ticktext=custom_y_labels,
        tickfont=dict(size=10)  # Control y-axis label size
    )
)

fig.show()


In [10]:
import pandas as pd
import plotly.express as px
import numpy as np

# Assuming relation_df is already defined
# Calculate the correlation matrix
corr_matrix = relation_df.corr()

# Define custom labels for the columns and rows using a dictionary
custom_labels = {
    "vox_rad": "Box I.Rad",
    "sun_hit": "Sun Occ",
    "vox_vecX": "Box.X",
    "vox_vecY": "Box.Y",
    "vox_vecZ": "Box.Z",
    "geo_vecX": "Geo.X",
    "geo_vecY": "Geo.Y",
    "geo_vecZ": "Geo.Z",
    "vox_dot": "Dot Product",
    "geo_rad": "Geo. I.Rad",
    "geo_s.hr": "Geo. Sun Hrs."
}

# Create a heatmap using imshow from Plotly Express
fig = px.imshow(
    corr_matrix,
    labels=dict(color=""),
    color_continuous_scale="RdBu",
    aspect='auto',
    x=[custom_labels.get(col, col) for col in corr_matrix.columns],
    y=[custom_labels.get(row, row) for row in corr_matrix.index]
)

# Update layout to remove all labels and set background color
fig.update_layout(
    title='',
    dragmode='select',
    width=1510,
    height=725,
    hovermode='closest',
    plot_bgcolor='#f3f3f5',  # Set plot background color
    paper_bgcolor='#f3f3f5',  # Set paper background color
    xaxis=dict(
        showticklabels=False,  # Hide x-axis labels
    ),
    yaxis=dict(
        showticklabels=False,  # Hide y-axis labels
    )
)

fig.show()


In [11]:
import plotly.express as px
import plotly.subplots as sp
import numpy as np
import pandas as pd

# Assuming cleaned_df is already defined
rel_df = cleaned_df.drop(columns=['geo_i', 'vox_rad', 'vox_vecX', 'vox_vecY', 'vox_vecZ'])
rel_group = cleaned_df.groupby("vox_i")
num_groups = len(rel_group)

# Define custom labels for the columns and rows using a dictionary
custom_labels = {
    "geo_rad": "Geo. I.Rad",
    "geo_s.hr": "Geo. Sun Hrs.",
    "xb_1": "XB 1",
    "xb_2": "XB 2",
    "xb_3": "XB 3",
    "xb_4": "XB 4",
    "xb_5": "XB 5",
    "xb_6": "XB 6",
    "vox_vecX": "Box.X",
    "vox_vecY": "Box.Y",
    "vox_vecZ": "Box.Z",
    "vox_dot": "Dot Product"
}

figures = []

for i in range(num_groups):
    relations = rel_group.get_group(i).drop(columns=['vox_i', 'geo_i', 'vox_vecX', 'vox_vecY', 'vox_vecZ'])
    
    corr_matrix = relations.corr()
    corr_matrix.fillna(0, inplace=True)

    selected_columns = ['geo_rad', 'geo_s.hr']
    corr_matrix = corr_matrix[selected_columns]

    # Create custom labels for selected columns
    custom_x_labels = [custom_labels.get(col, col) for col in selected_columns]
    custom_y_labels = [custom_labels.get(row, row) for row in corr_matrix.index]

    # Create a heatmap using imshow from Plotly Express
    fig = px.imshow(
        corr_matrix,
        labels=dict(color="Coeff"),
        color_continuous_scale="RdBu",  # Set the color scale to "RdBu"
        aspect='auto',
        x=custom_x_labels,
        y=custom_y_labels
    )

    fig.update_layout(
        title='',
        dragmode='select',
        width=300,
        height=600,
        hovermode='closest',
        font=dict(size=25)  # Increase the font size for the entire figure
    )

    figures.append(fig)

# Create a subplot figure
subplot_fig = sp.make_subplots(rows=1, cols=num_groups, subplot_titles=[f'Bound Face {i+1}' for i in range(num_groups)], horizontal_spacing=0.05)

for idx, fig in enumerate(figures):
    for trace in fig['data']:
        trace['showscale'] = False  # Hide individual scales
        subplot_fig.add_trace(trace, row=1, col=idx+1)

# Add annotations to subplot figure
for idx, fig in enumerate(figures):
    for ann in fig['layout']['annotations']:
        ann['xref'] = f'x{idx+1}'
        ann['yref'] = f'y{idx+1}'
        subplot_fig.add_annotation(ann)

# Update layout to ensure the color scale is RdBu
subplot_fig.update_layout(coloraxis=dict(colorscale='RdBu'))

# Hide y-axis labels for all but the first subplot
for idx in range(2, num_groups+1):
    subplot_fig.update_yaxes(showticklabels=False, row=1, col=idx)

# Increase the text size of the labels at the bottom and the left
subplot_fig.update_xaxes(tickfont=dict(size=10))  # Increase font size for x-axis labels
subplot_fig.update_yaxes(tickfont=dict(size=10))  # Increase font size for y-axis labels

# Increase font size for subplot titles
for annotation in subplot_fig['layout']['annotations']:
    annotation['font'] = dict(size=10)

# Set the background color (replace 'rgb(255, 255, 255)' with your desired color)
background_color = 'rgb(240, 240, 240)'  # Example: light gray
subplot_fig.update_layout(
    plot_bgcolor=background_color,
    paper_bgcolor=background_color,
    height=1500/divider,
    width=725/divider * num_groups/1.375,
    showlegend=False,
    font=dict(size=10)  # Increase the font size for the entire subplot figure
)

# Show the figure
subplot_fig.show()


In [12]:
import plotly.express as px
import plotly.subplots as sp
import numpy as np
import pandas as pd

# Assuming cleaned_df is already defined
rel_df = cleaned_df.drop(columns=['geo_i', 'vox_rad', 'vox_vecX', 'vox_vecY', 'vox_vecZ'])
rel_group = cleaned_df.groupby("vox_i")
num_groups = len(rel_group)

# Define custom labels for the columns and rows using a dictionary
custom_labels = {
    "geo_rad": "Geo. I.Rad",
    "geo_s.hr": "Geo. Sun Hrs.",
    "xb_1": "XB 1",
    "xb_2": "XB 2",
    "xb_3": "XB 3",
    "xb_4": "XB 4",
    "xb_5": "XB 5",
    "xb_6": "XB 6",
    "vox_vecX": "Box.X",
    "vox_vecY": "Box.Y",
    "vox_vecZ": "Box.Z",
    "vox_dot": "Dot Product"
}

figures = []

for i in range(num_groups):
    relations = rel_group.get_group(i).drop(columns=['vox_i', 'geo_i', 'vox_vecX', 'vox_vecY', 'vox_vecZ'])
    
    corr_matrix = relations.corr()
    corr_matrix.fillna(0, inplace=True)

    selected_columns = ['geo_rad', 'geo_s.hr']
    corr_matrix = corr_matrix[selected_columns]

    # Create custom labels for selected columns
    custom_x_labels = [custom_labels.get(col, col) for col in selected_columns]
    custom_y_labels = [custom_labels.get(row, row) for row in corr_matrix.index]

    # Create a heatmap using imshow from Plotly Express
    fig = px.imshow(
        corr_matrix,
        labels=dict(color="Coeff"),
        color_continuous_scale="RdBu",  # Set the color scale to "RdBu"
        aspect='auto',
        x=custom_x_labels,
        y=custom_y_labels
    )

    fig.update_layout(
        title='',
        dragmode='select',
        width=300,
        height=600,
        hovermode='closest',
        font=dict(size=25)  # Increase the font size for the entire figure
    )

    figures.append(fig)

# Create a subplot figure
subplot_fig = sp.make_subplots(rows=1, cols=num_groups, horizontal_spacing=0.05)

for idx, fig in enumerate(figures):
    for trace in fig['data']:
        trace['showscale'] = False  # Hide individual scales
        subplot_fig.add_trace(trace, row=1, col=idx+1)

# Add annotations to subplot figure
for idx, fig in enumerate(figures):
    for ann in fig['layout']['annotations']:
        ann['xref'] = f'x{idx+1}'
        ann['yref'] = f'y{idx+1}'
        subplot_fig.add_annotation(ann)

# Update layout to ensure the color scale is RdBu
subplot_fig.update_layout(coloraxis=dict(colorscale='RdBu'))

# Hide y-axis and x-axis labels for all subplots
for idx in range(1, num_groups+1):
    subplot_fig.update_yaxes(showticklabels=False, row=1, col=idx)
    subplot_fig.update_xaxes(showticklabels=False, row=1, col=idx)

# Increase font size for subplot titles if they were needed
for annotation in subplot_fig['layout']['annotations']:
    annotation['font'] = dict(size=35)

# Set the background color (replace 'rgb(255, 255, 255)' with your desired color)
background_color = 'rgb(240, 240, 240)'  # Example: light gray
subplot_fig.update_layout(
    plot_bgcolor=background_color,
    paper_bgcolor=background_color,
    height=1500,
    width=725 * num_groups/1.45,
    showlegend=False,
    font=dict(size=18)  # Increase the font size for the entire subplot figure
)

# Show the figure
subplot_fig.show()


# EVALUATING DATA _ SCATTER

In [54]:
import plotly.express as px
import numpy as np
import pandas as pd

# Assuming relation_df is already defined and contains the necessary columns

# Function to create a plot with a specific angle
def create_plot(frame):
    angle = frame * 360 / 120  # Calculate the angle for the specific frame (60 frames)
    fig = px.scatter_3d(
        relation_df,
        x="sun_hit",
        y="vox_dot",
        z="geo_rad",
        color="sun_hit",  # Add color based on Sun Occ
        opacity=1,
        labels={
            "sun_hit": "Sun Occlusion",
            "vox_dot": "Vox Dot",
            "geo_rad": "Geo. Radiation"
        },
        color_continuous_scale=px.colors.sequential.Viridis  # Choose a color scale
    )
    
    # Update layout for better readability and remove tick labels
    fig.update_layout(
        title="3D Scatter Plot of Sun Occ, Vox Dot, and Geo. I.Rad",
        width=800,  # Increase plot width
        height=800,  # Increase plot height
        plot_bgcolor='#f3f3f5',  # Set plot background color
        paper_bgcolor='#f3f3f5',  # Set paper background color
        scene=dict(
            xaxis=dict(
                showticklabels=False,  # Remove x-axis tick labels
                title=dict(font=dict(size=25))  # Increase x-axis title font size
            ),
            yaxis=dict(
                showticklabels=False,  # Remove y-axis tick labels
                title=dict(font=dict(size=25))  # Increase y-axis title font size
            ),
            zaxis=dict(
                showticklabels=False,  # Remove z-axis tick labels
                title=dict(font=dict(size=25))  # Increase z-axis title font size
            ),
            camera=dict(
                eye=dict(x=2.1 * np.sin(np.radians(angle)), y=2.1 * np.cos(np.radians(angle)), z=0.5),
                projection=dict(type='perspective')  # Set the projection type to perspective
            )
        )
    )
    return fig

# Create a list to store all the plots
plots = []

# Generate and store the plots
for frame in range(120):  # Assuming 60 frames
    fig = create_plot(frame)
    plots.append(fig)


In [55]:
plots[15].show()

In [51]:
# # Save the plots to HTML files
# output_dir = "C:/Users/shrey/OneDrive/Desktop/New folder"
# for i, fig in enumerate(plots):
#     fig.write_html(f"{output_dir}/frame_{i:03d}.html")

# print("Plots have been saved as HTML files.")

# import os
# from selenium import webdriver
# from selenium.webdriver.chrome.options import Options
# import time

# # Define the directories
# output_dir = "C:/Users/shrey/OneDrive/Desktop/New folder"
# screenshot_dir = "C:/Users/shrey/OneDrive/Desktop/New folder/screenshots"

# # Create the screenshot directory if it doesn't exist
# if not os.path.exists(screenshot_dir):
#     os.makedirs(screenshot_dir)

# # Set up the Selenium WebDriver with headless Chrome
# chrome_options = Options()
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--disable-gpu")
# chrome_options.add_argument("--window-size=1600,1200")
# driver = webdriver.Chrome(options=chrome_options)

# # Function to capture a screenshot of an HTML file
# def capture_screenshot(html_file, screenshot_file):
#     driver.get(f"file:///{html_file}")
#     time.sleep(2)  # Wait for the page to load
#     driver.save_screenshot(screenshot_file)

# # Capture screenshots for all frames
# for i in range(120):
#     html_file = f"{output_dir}/frame_{i:03d}.html"
#     screenshot_file = f"{screenshot_dir}/frame_{i:03d}.png"
#     capture_screenshot(html_file, screenshot_file)

# # Close the WebDriver
# driver.quit()

# print("Screenshots have been saved.")


Plots have been saved as HTML files.
Screenshots have been saved.


# Random Forest Regression FOR Incident Radiation & Direct Sun Hours

In [30]:
from sklearn.ensemble import RandomForestRegressor
import joblib
import os
import time

model_path = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/ML Models/01P_Trained_VoxRad-VoxDot"

estimators = 100
rstate = 10

# Separate features and targets for Radiation
feature_set = cleaned_df.loc[:, 'vox_rad':'vox_dot']
y_geo_radiation = cleaned_df['geo_rad']

# Train Random Forest Regressor model for Radiation on the entire dataset
start_time = time.time()
random_forest_regressor_radiation = RandomForestRegressor(n_estimators=100, random_state=5)
random_forest_regressor_radiation.fit(feature_set, y_geo_radiation)
random_forest_time_radiation = time.time() - start_time

# Print training time for Radiation
print(f"Training time for Random Forest Regressor (Radiation): {random_forest_time_radiation:.2f} seconds")

# Save the Random Forest model for Radiation to a specific path
os.makedirs(model_path, exist_ok=True)
joblib.dump(random_forest_regressor_radiation, os.path.join(model_path, 'random_forest_geo_radiation.pkl'))

print("Random Forest model for Incident Radiation trained and saved successfully.")

# Separate features and targets for Direct Sun Hours
y_geo_sunhr = cleaned_df['geo_s.hr']

# Train Random Forest Regressor model for Direct Sun Hours on the entire dataset
start_time = time.time()
random_forest_regressor_sunhr = RandomForestRegressor(n_estimators=100, random_state=5)
random_forest_regressor_sunhr.fit(feature_set, y_geo_sunhr)
random_forest_time_sunhr = time.time() - start_time

# Print training time for Direct Sun Hours
print(f"Training time for Random Forest Regressor (Direct Sun Hours): {random_forest_time_sunhr:.2f} seconds")

# Save the Random Forest model for Direct Sun Hours to a specific path
joblib.dump(random_forest_regressor_sunhr, os.path.join(model_path, 'random_forest_geo_sunhr.pkl'))

print("Random Forest model for Direct Sun Hours trained and saved successfully.")


Training time for Random Forest Regressor (Radiation): 5.12 seconds
Random Forest model for Incident Radiation trained and saved successfully.
Training time for Random Forest Regressor (Direct Sun Hours): 6.05 seconds
Random Forest model for Direct Sun Hours trained and saved successfully.


# Linear Regression Training

In [9]:
from sklearn.linear_model import LinearRegression
import joblib
import os
import time

model_path = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/ML Models/01P_Trained_VoxRad-VoxDot"

# Separate features and targets for Radiation
feature_set = cleaned_df.loc[:, 'vox_rad':'vox_dot']
y_geo_radiation = cleaned_df['geo_rad']

# Train Linear Regression model for Radiation on the entire dataset
start_time = time.time()
linear_regression_radiation = LinearRegression()
linear_regression_radiation.fit(feature_set, y_geo_radiation)
linear_regression_time_radiation = time.time() - start_time

# Print training time for Radiation
print(f"Training time for Linear Regression (Radiation): {linear_regression_time_radiation:.2f} seconds")

# Save the Linear Regression model for Radiation to a specific path
os.makedirs(model_path, exist_ok=True)
joblib.dump(linear_regression_radiation, os.path.join(model_path, 'linear_regression_geo_radiation.pkl'))

print("Linear Regression model for Incident Radiation trained and saved successfully.")

# Separate features and targets for Direct Sun Hours
y_geo_sunhr = cleaned_df['geo_s.hr']

# Train Linear Regression model for Direct Sun Hours on the entire dataset
start_time = time.time()
linear_regression_sunhr = LinearRegression()
linear_regression_sunhr.fit(feature_set, y_geo_sunhr)
linear_regression_time_sunhr = time.time() - start_time

# Print training time for Direct Sun Hours
print(f"Training time for Linear Regression (Direct Sun Hours): {linear_regression_time_sunhr:.2f} seconds")

# Save the Linear Regression model for Direct Sun Hours to a specific path
joblib.dump(linear_regression_sunhr, os.path.join(model_path, 'linear_regression_geo_sunhr.pkl'))

print("Linear Regression model for Direct Sun Hours trained and saved successfully.")


Training time for Linear Regression (Radiation): 0.01 seconds
Linear Regression model for Incident Radiation trained and saved successfully.
Training time for Linear Regression (Direct Sun Hours): 0.00 seconds
Linear Regression model for Direct Sun Hours trained and saved successfully.


In [17]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import joblib
import os
import time

model_path = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/ML Models/01P_Trained_VoxRad-VoxDot"

# Define the degree of the polynomial
degree = 6

# Separate features and targets for Radiation
feature_set = cleaned_df.loc[:, 'vox_rad':'vox_dot']
y_geo_radiation = cleaned_df['geo_rad']

# Train Polynomial Regression model for Radiation on the entire dataset
start_time = time.time()
polynomial_regression_radiation = make_pipeline(PolynomialFeatures(degree), LinearRegression())
polynomial_regression_radiation.fit(feature_set, y_geo_radiation)
polynomial_regression_time_radiation = time.time() - start_time

# Print training time for Radiation
print(f"Training time for Polynomial Regression (Radiation): {polynomial_regression_time_radiation:.2f} seconds")

# Save the Polynomial Regression model for Radiation to a specific path
os.makedirs(model_path, exist_ok=True)
joblib.dump(polynomial_regression_radiation, os.path.join(model_path, 'polynomial_regression_geo_radiation.pkl'))

print("Polynomial Regression model for Incident Radiation trained and saved successfully.")

# Separate features and targets for Direct Sun Hours
y_geo_sunhr = cleaned_df['geo_s.hr']

# Train Polynomial Regression model for Direct Sun Hours on the entire dataset
start_time = time.time()
polynomial_regression_sunhr = make_pipeline(PolynomialFeatures(degree), LinearRegression())
polynomial_regression_sunhr.fit(feature_set, y_geo_sunhr)
polynomial_regression_time_sunhr = time.time() - start_time

# Print training time for Direct Sun Hours
print(f"Training time for Polynomial Regression (Direct Sun Hours): {polynomial_regression_time_sunhr:.2f} seconds")

# Save the Polynomial Regression model for Direct Sun Hours to a specific path
joblib.dump(polynomial_regression_sunhr, os.path.join(model_path, 'polynomial_regression_geo_sunhr.pkl'))

print("Polynomial Regression model for Direct Sun Hours trained and saved successfully.")


Training time for Polynomial Regression (Radiation): 30.43 seconds
Polynomial Regression model for Incident Radiation trained and saved successfully.
Training time for Polynomial Regression (Direct Sun Hours): 31.43 seconds
Polynomial Regression model for Direct Sun Hours trained and saved successfully.


In [22]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
import numpy as np
import joblib
import os
import time

# Define the degree of the polynomial
degree = 7

# Separate features and targets for Radiation
feature_set = cleaned_df.loc[:, 'vox_rad':'vox_dot']

# Log transform the target variable to handle non-negative predictions
y_geo_radiation = np.log1p(cleaned_df['geo_rad'])

# Train Polynomial Regression model for Radiation on the entire dataset
start_time = time.time()
polynomial_regression_radiation = make_pipeline(PolynomialFeatures(degree), LinearRegression())
polynomial_regression_radiation.fit(feature_set, y_geo_radiation)
polynomial_regression_time_radiation = time.time() - start_time

# Print training time for Radiation
print(f"Training time for Polynomial Regression (Radiation): {polynomial_regression_time_radiation:.2f} seconds")

# Save the Polynomial Regression model for Radiation to a specific path
model_path = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/ML Models/01P_Trained_VoxRad-VoxDot"
os.makedirs(model_path, exist_ok=True)
joblib.dump(polynomial_regression_radiation, os.path.join(model_path, 'polynomial_regression_geo_radiation.pkl'))

print("Polynomial Regression model for Incident Radiation trained and saved successfully.")

# Separate features and targets for Direct Sun Hours
y_geo_sunhr = np.log1p(cleaned_df['geo_s.hr'])

# Train Polynomial Regression model for Direct Sun Hours on the entire dataset
start_time = time.time()
polynomial_regression_sunhr = make_pipeline(PolynomialFeatures(degree), LinearRegression())
polynomial_regression_sunhr.fit(feature_set, y_geo_sunhr)
polynomial_regression_time_sunhr = time.time() - start_time

# Print training time for Direct Sun Hours
print(f"Training time for Polynomial Regression (Direct Sun Hours): {polynomial_regression_time_sunhr:.2f} seconds")

# Save the Polynomial Regression model for Direct Sun Hours to a specific path
joblib.dump(polynomial_regression_sunhr, os.path.join(model_path, 'polynomial_regression_geo_sunhr.pkl'))

print("Polynomial Regression model for Direct Sun Hours trained and saved successfully.")


Training time for Polynomial Regression (Radiation): 277.67 seconds
Polynomial Regression model for Incident Radiation trained and saved successfully.
Training time for Polynomial Regression (Direct Sun Hours): 272.45 seconds
Polynomial Regression model for Direct Sun Hours trained and saved successfully.
