In [4]:
import pandas as pd
import numpy as np
import os

import joblib

In [5]:
#Update Pandas Settings
pd.set_option('display.width', None)  # to remove any wrapping
pd.set_option('display.max_columns', None)  # to show all columns
pd.set_option('display.width', 1000)

# IMPORTING DATA

In [6]:
#Main Definitions Here

def import_combine(folder, geo, file_count = 0):
    #loop through folder
    dfs = []
    # Generate file names, and read each file
    for i in range(0, file_count):
        filename = f'{folder}/{geo}_8760.hrs_{str(i)}.csv'
        # filename = f'{folder}/{geo}_{str(i)}.csv'
        try:            
            df = pd.read_csv(filename,sep = '#')
            dfs.append(df)

        except FileNotFoundError:
            print(f'File {filename} not found. Skipping.')

    combined_df = pd.concat(dfs, ignore_index=True)
    return(combined_df)


def clean_data(combined_df):
    # Function to convert the string representation of a list into an actual list of floats
    def convert_to_list(string):
        return eval(string)

    #Apply the function to 'vf_vec' and 'gf_vec' columns
    combined_df = combined_df.assign(vox_vec=lambda df: df['vox_vec'].apply(convert_to_list), 
                                 geo_vec=lambda df: df['geo_vec'].apply(convert_to_list)
                                 )

    #===================================================================================================================================================
    # # Split vox_hit into boolean columns
    # combined_df['xb_1'], combined_df['xb_2'], combined_df['xb_3'], combined_df['xb_4'], combined_df['xb_5'], combined_df['xb_6'], = zip(
    #     *combined_df['bound_hit'].apply(lambda v: (v[0], v[1], v[2], v[3], v[4], v[5])))
    
    #===================================================================================================================================================
    # Split vectors into 3 new columns each for gf_vec & vf_vec using lambda functions

    combined_df[('vox_vecX')], combined_df['vox_vecY'], combined_df['vox_vecZ'] = zip(*combined_df['vox_vec'].apply(lambda v: (v[0], v[1], v[2])))
    combined_df['geo_vecX'], combined_df['geo_vecY'], combined_df['geo_vecZ'] = zip(*combined_df['geo_vec'].apply(lambda v: (v[0], v[1], v[2])))

    #===================================================================================================================================================
    # Vector Math
    # Dot Product with Related Face
    combined_df['vox_dot'] = combined_df.apply(lambda row: np.dot(row['vox_vec'], row['geo_vec']), axis=1)
    
    #===================================================================================================================================================    
    #Remove Ir. Columns lists
    combined_df = combined_df.drop(columns=['geo_vec', 'vox_vec'])

    #===================================================================================================================================================
    # Move Columns to Last
    move_columns =  ['geo_rad', 'geo_s.hr']

    # Pop and reassign columns using a loop
    for col in move_columns:
        combined_df[col] = combined_df.pop(col)

    #===================================================================================================================================================
    #clean data
    output_df = combined_df.drop_duplicates()
    #===================================================================================================================================================
    #Return Data
    return(output_df)

In [7]:
#Import Training Data
folder = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/COLAB/CSV/17_cube dataset_step15_sunshaded"
geo = "cube_hole"
file_count = 343
combined_df = import_combine(folder,geo, file_count)
print(combined_df.shape)

#Clean Training Data
cleaned_df = clean_data(combined_df)
print(cleaned_df.shape)
cleaned_df.head()

#Pop & Group for Identifying Relationships
relation_df = cleaned_df.loc[:,'vox_rad':'geo_s.hr']

(35674, 8)
(31970, 13)


# Random Forest Regression FOR Incident Radiation & Direct Sun Hours

In [30]:
from sklearn.ensemble import RandomForestRegressor
import joblib
import os
import time

model_path = "C:/Users/shrey/OneDrive/Desktop/01_IAAC/IAAC_MAA_2407/00_THESIS/WIP/ML Models/01P_Trained_VoxRad-VoxDot"

estimators = 100
rstate = 10

# Separate features and targets for Radiation
feature_set = cleaned_df.loc[:, 'vox_rad':'vox_dot']
y_geo_radiation = cleaned_df['geo_rad']

# Train Random Forest Regressor model for Radiation on the entire dataset
start_time = time.time()
random_forest_regressor_radiation = RandomForestRegressor(n_estimators=100, random_state=5)
random_forest_regressor_radiation.fit(feature_set, y_geo_radiation)
random_forest_time_radiation = time.time() - start_time

# Print training time for Radiation
print(f"Training time for Random Forest Regressor (Radiation): {random_forest_time_radiation:.2f} seconds")

# Save the Random Forest model for Radiation to a specific path
os.makedirs(model_path, exist_ok=True)
joblib.dump(random_forest_regressor_radiation, os.path.join(model_path, 'random_forest_geo_radiation.pkl'))

print("Random Forest model for Incident Radiation trained and saved successfully.")

# Separate features and targets for Direct Sun Hours
y_geo_sunhr = cleaned_df['geo_s.hr']

# Train Random Forest Regressor model for Direct Sun Hours on the entire dataset
start_time = time.time()
random_forest_regressor_sunhr = RandomForestRegressor(n_estimators=100, random_state=5)
random_forest_regressor_sunhr.fit(feature_set, y_geo_sunhr)
random_forest_time_sunhr = time.time() - start_time

# Print training time for Direct Sun Hours
print(f"Training time for Random Forest Regressor (Direct Sun Hours): {random_forest_time_sunhr:.2f} seconds")

# Save the Random Forest model for Direct Sun Hours to a specific path
joblib.dump(random_forest_regressor_sunhr, os.path.join(model_path, 'random_forest_geo_sunhr.pkl'))

print("Random Forest model for Direct Sun Hours trained and saved successfully.")


Training time for Random Forest Regressor (Radiation): 5.12 seconds
Random Forest model for Incident Radiation trained and saved successfully.
Training time for Random Forest Regressor (Direct Sun Hours): 6.05 seconds
Random Forest model for Direct Sun Hours trained and saved successfully.
