In [62]:
#imports
import geopandas as gpd
import pandas as pd
import rasterio
import rasterio.mask
import tempfile
import fiona
import os
import glob
import numpy as np
import traceback
import subprocess
import json

In [63]:
# get the working directory
cwd = os.getcwd()

# warning messages
message = {}

# get the shapefile folder
shapefile_folder = os.path.join(cwd, 'shapefile')
# Pattern to match shapefile (shapefile typically have extensions like .shp)
shapefile_pattern = os.path.join(shapefile_folder, '*.shp')

shapefiles_list = glob.glob(shapefile_pattern)
# print(shapefiles_list)

# Count the number of shapefile
num_shapefiles = len(shapefiles_list)
# print(num_shapefiles)

# get the java folder
java_folder = os.path.join(cwd, 'source/java')

# get the csv folder
csv_folder = os.path.join(cwd, 'source/csv_files')
# Ensure the output directory exists and remove all existing CSV files in the output folder
if os.path.exists(csv_folder):
    for file in glob.glob(os.path.join(csv_folder, '*.csv')):
        os.remove(file)
    else:
        os.makedirs(csv_folder, exist_ok=True)
# get the modified csv folder path
modified_csv_folder = os.path.join(cwd, 'source/modified_csv_for_curr_QMD')
if os.path.exists(modified_csv_folder):
        for file in glob.glob(os.path.join(modified_csv_folder, '*.csv')):
            os.remove(file)
        else:
            os.makedirs(modified_csv_folder, exist_ok=True)
result_folder = os.path.join(cwd, 'result')
if os.path.exists(result_folder):
        for file in glob.glob(os.path.join(result_folder, '*.csv')):
            os.remove(file)
        else:
            os.makedirs(result_folder, exist_ok=True)
# get the raster file
raster_folder = os.path.join(cwd,'source/rasters')
tiffile = os.path.join(raster_folder,'Final_comp31024.tif')

# Get the parent directory
parent_dir = os.path.dirname(cwd)


In [64]:
def filter_standID_for_generatingCSV_for_CurrQMD(lis):
    dic_for_standID_with_currQMD = {}
    for item in lis:
        if 'curr_QMD' in item and 'curr_TPA' in item:
            try:
                # Convert the values to floats
                curr_qmd = float(item['curr_QMD'][0])
                curr_tpa = float(item['curr_TPA'][0])
                # Check if the values are not NaN and not zero
                if not np.isnan(curr_qmd) and not np.isnan(curr_tpa) and curr_qmd != 0 and curr_tpa != 0:
                    stand_id = int(item['StandID'][0])
                    dic_for_standID_with_currQMD[stand_id] = [int(item['curr_QMD'][0]),int(item['curr_TPA'][0])]
            except (ValueError, TypeError):
                print("absence due to invalid values")
    return dic_for_standID_with_currQMD

        
# dic = filter_standID_for_generatingCSV_for_CurrQMD(fields_from_shapefile)
# print(dic)

In [65]:
def to_csv(out_image, csv_folder, index, parameters, csv_files, stand_id ,dict_for_standIDnCrrnQMD_from_allFields, csv_with_modified_qmd):
        """
        Process the raster file and save the band data to CSV files for each shape.

        Parameters:
        - out_image (numpy array): Masked raster data.
        - model_parameters (dict): Dictionary containing shape parameters.
        - output_folder (str): Folder to save the CSV files.
        Returns:
        - List of CSV filenames.
        """     
        os.makedirs(csv_folder, exist_ok=True)
        # column_names = ['aws0_100', 'DEP2RES', 'HeatLoad', 'MAPMCMT', 'Rad_sm', 'B_TD', 'soc_05','soc0_20','slope','pratio','consLITH']
        column_names = [ 'USA_Slope', 'HeatLoad','Cons_LITH', 'Rad_sm', 'B_TD', 'MCMT', 'MAP', 'PRATIO', 'DEP2RES','aws0_100', 'soc0_5']

        with rasterio.open(tiffile) as src:
            # Flatten band data and create DataFrame
            band_data = {f'band_{j+1}': out_image[j].flatten() for j in range(out_image.shape[0])}
            df = pd.DataFrame(band_data)
            # Replace the nodata values with NaN
            for band in band_data:
                df[band] = df[band].replace(src.nodata, np.nan)
            df_cleaned = df    
            if not df_cleaned.empty:
                # Rename columns
                if len(column_names) >= df_cleaned.shape[1]:
                    df_cleaned.columns = column_names[:df_cleaned.shape[1]]
                # Extend the csv file with the model parameters
                param_df = pd.DataFrame({key: [value[0]] * len(df_cleaned) for key, value in parameters.items()})
                param_df = param_df.apply(lambda x: x[0] if isinstance(x, list) else x)
                df_extended = pd.concat([df_cleaned, param_df], axis=1)
                value_mapping = {
                    1: 'Extrusive',
                    2: 'Other',
                    3: 'Sedimentary',
                    4: 'Unconsolidated-Fine',
                    5: 'Metasedimentary',
                    6: 'CaSedimentary',
                    7: 'Unconsolidated-Coarse',
                    8: 'Plutonic',
                    9: 'Metamorphic',
                    10: 'Unconsolidated-Organic',
                    11: 'CaMetasedimentary',
                    12: 'Unconsolidated'
                }
                # Replace the numeric values with string values
                df_extended['Cons_LITH'] = df_extended['Cons_LITH'].replace(value_mapping)
                df_extended['MCMT'] = df_extended['MCMT'] /10
                df_extended['PRATIO'] = df_extended['PRATIO']/10000
                # drop the NaN values
                df_extended = df_extended.dropna()
                # Save to CSV
                csv_filename = f"csv_for_stand_{stand_id}.csv"
                csv_path = os.path.join(csv_folder, csv_filename)
                csv_files.append(csv_path)
                df_extended.to_csv(csv_path, index=False)
                if stand_id in dict_for_standIDnCrrnQMD_from_allFields:
                     Current_QMD =  dict_for_standIDnCrrnQMD_from_allFields[stand_id]
                     print(Current_QMD)
                     df_extended['QMD'] = Current_QMD[0]
                     new_file_name = f"with_cur_QMD_{csv_filename}"
                     csv_with_modified_qmd.append(os.path.join(modified_csv_folder, new_file_name))
                     df_extended.to_csv(os.path.join(modified_csv_folder, new_file_name), index=False)

                     

In [66]:

# csvfile list
csv_files = []
# Define desired columns
desired_columns = ['QMD', 'RC_PROP', 'WH_PROP', 'GF_PROP', 'LP_PROP', 'WL_PROP', 'DF_PROP', 'PP_PROP']
stand_id = []
fields_from_shapefile = []
dict_for_standIDnCrrnQMD_from_allFields = {}
csv_with_current_qmd_and_tpa = []


for shapefile in shapefiles_list:
    # Path to your shapefile
    shapefile_path = shapefile
    # Read the main shapefile
    try:
        ply = gpd.read_file(shapefile_path)
    except Exception as ex:
        print(f"Unable to read shapefile  {shapefile_path}. Your file may be corrupt: {ex}")
        raise

    # Change CRS to match raster
    ply = ply.to_crs(epsg=3857)
    with tempfile.NamedTemporaryFile() as tf:
        crsName = tf.name
    ply.to_file(crsName)  # Write new shapefile 
    # Read and process shapefile
    with rasterio.open(tiffile) as geotiff:
        with fiona.open(crsName, "r") as shapefile:
            shapes = [feature["geometry"] for feature in shapefile]
            for index, shape in enumerate(shapes):
                try:
                    out_image, out_transform = rasterio.mask.mask(geotiff, [shape], crop=True, all_touched=True)                 
                    # Extract desired columns from the clipped shapefile
                    clipped_shapefile = ply.clip(gpd.GeoDataFrame(geometry=[shape], crs=ply.crs))
                    fields_from_shapefile.append(clipped_shapefile.copy().to_dict(orient='list'))
                    dict_for_standIDnCrrnQMD_from_allFields = filter_standID_for_generatingCSV_for_CurrQMD(fields_from_shapefile)
                    extracted_data = clipped_shapefile[desired_columns].copy()
                    extracted_data["W_ShadeTol"] = extracted_data["RC_PROP"] * 4.73 + extracted_data["WH_PROP"] * 4.96 + extracted_data["GF_PROP"] * 4.01 + extracted_data["LP_PROP"] * 1.73 + extracted_data["WL_PROP"] * 1.35 + extracted_data["DF_PROP"] * 2.78 + extracted_data["PP_PROP"] * 1.64
                    extracted_data["W_DroughtTol"] = extracted_data["RC_PROP"] * 2.23 + extracted_data["WH_PROP"] * 1.17 + extracted_data["GF_PROP"] * 2.33 + extracted_data["LP_PROP"] * 4.04 + extracted_data["WL_PROP"] * 2.42 + extracted_data["DF_PROP"] * 2.62 + extracted_data["PP_PROP"] * 4.32                 
                    # Extract StandID as integer and append to stand_id list -- we are getting this stand id from shapefile 
                    stand_id_value = int(clipped_shapefile['StandID'].values[0])
                    stand_id.append(stand_id_value)                  
                    # Convert the extracted data to a dictionary
                    extracted_data_dict_list = extracted_data.to_dict(orient='records')
                    # convert the list of dictionaries into a single dictionary 
                    extracted_data_dict = extracted_data.to_dict(orient='list')
                    # fields_from_shapefile.append(extracted_data_dict)
                    # Call to_csv function to create and save the CSV file
                    to_csv(out_image, csv_folder, index, extracted_data_dict, csv_files, stand_id[-1], dict_for_standIDnCrrnQMD_from_allFields, csv_with_current_qmd_and_tpa)
                except Exception as ex:
                    print(f"Error processing shape {index}: {str(ex)}")
                    traceback.print_exc()  # Print the full traceback
                    print("\n")
                    print("\n")
                    print("\n")



[8, 700]
[11, 400]
[12, 390]
[9, 550]


In [67]:
print(fields_from_shapefile)

[{'Id': [0], 'StandID': [11.0], 'QMD': [10.0], 'RC_PROP': [0.0], 'WH_PROP': [0.0], 'GF_PROP': [0.0], 'LP_PROP': [0.0], 'WL_PROP': [0.0], 'DF_PROP': [1.0], 'PP_PROP': [0.0], 'curr_QMD': [8.0], 'curr_TPA': [700.0], 'geometry': [<POLYGON ((-12890060.839 5978042.574, -12880693.712 5935692.332, -12912794.7...>]}, {'Id': [0], 'StandID': [22.0], 'QMD': [10.0], 'RC_PROP': [0.0], 'WH_PROP': [0.0], 'GF_PROP': [0.0], 'LP_PROP': [0.0], 'WL_PROP': [0.0], 'DF_PROP': [1.0], 'PP_PROP': [0.0], 'curr_QMD': [11.0], 'curr_TPA': [400.0], 'geometry': [<POLYGON ((-13331829.383 6245403.014, -13294934.123 6194934.622, -13330251.0...>]}, {'Id': [0], 'StandID': [55.0], 'QMD': [10.0], 'RC_PROP': [0.0], 'WH_PROP': [0.0], 'GF_PROP': [0.0], 'LP_PROP': [0.0], 'WL_PROP': [0.0], 'DF_PROP': [1.0], 'PP_PROP': [0.0], 'curr_QMD': [12.0], 'curr_TPA': [390.0], 'geometry': [<POLYGON ((-13002510.161 6232920.48, -13020317.891 6190275.082, -13033981.56...>]}, {'Id': [0], 'StandID': [66.0], 'QMD': [10.0], 'RC_PROP': [0.0], 'WH_PR

In [68]:
# this code will filter out the csv that are not empty and can be fed to model.

def filter_csv(folder_path):
    # List all CSV files in the folder
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]
    if csv_files is None:
        return 
    # Initialize a list to store the 'stand' values from non-empty CSV files
    stand_id_result = []
    csv_for_result = []
    # Check each CSV file
    for files in csv_files:
        try:
            df = pd.read_csv(files)  # Read the CSV file into a DataFrame
            if not df.empty:  # If the CSV file is not empty
                # Extract the 'stand' value from the file name
                csv_for_result.append(files)
                filename = os.path.basename(files)  # Get the file name without the path
                stand = filename.split('_')[-1].split('.')[0]  # Extract the 'stand' part
                stand_id_result.append(stand)  # Add the 'stand' to the list
        except pd.errors.EmptyDataError:
            # Handle the case where the CSV file is empty or invalid
            print(f"{file} is empty or has no valid data.")
    return csv_for_result, stand_id_result
    



['d:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_11.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_22.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_55.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_66.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_111.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_222.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_555.csv', 'd:\\trails\\testing model with %sdimax\\INLAND MODEL\\source/csv_files\\csv_for_stand_666.csv'] ['11', '22', '55', '66', '111', '222', '555', '666']


In [69]:
def prediction(csv_for_result):
    predictions_and_stats_list = []
    # Navigate to the 'java' directory (adjust path as needed)
    cwd = os.getcwd()  # Assuming cwd is defined somewhere earlier
    JAVA_SERVICE_PATH = os.path.join(cwd, 'source/java')
    H2O_GENMODEL_JAR_PATH = os.path.join(JAVA_SERVICE_PATH, 'h2o-genmodel.jar')
    GSON_JAR_PATH = os.path.join(JAVA_SERVICE_PATH, 'gson-2.8.8.jar')
    MAIN_JAVA_FILE_PATH = os.path.join(JAVA_SERVICE_PATH, 'main1.java')

    # Check if Java files and JARs exist
    if not os.path.exists(H2O_GENMODEL_JAR_PATH):
        raise FileNotFoundError(f"JAR file not found: {H2O_GENMODEL_JAR_PATH}")
    if not os.path.exists(GSON_JAR_PATH):
        raise FileNotFoundError(f"JAR file not found: {GSON_JAR_PATH}")
    if not os.path.exists(MAIN_JAVA_FILE_PATH):
        raise FileNotFoundError(f"Java file not found: {MAIN_JAVA_FILE_PATH}")

    # Use the correct classpath separator
    classpath_separator = os.pathsep
    classpath = f'.{classpath_separator}{H2O_GENMODEL_JAR_PATH}{classpath_separator}{GSON_JAR_PATH}'

    try:
        for item in csv_for_result:
            # Compile the Java code
            compile_command = ['javac', '-cp', classpath, 'main1.java']
            subprocess.run(compile_command, cwd=JAVA_SERVICE_PATH, check=True)
            # Execute the Java code and capture the output
            run_command = ['java', '-cp', classpath, 'main1', item]
            output = subprocess.check_output(run_command, cwd=JAVA_SERVICE_PATH)
            output_str = output.decode('utf-8')
            # Process the output (assuming it's JSON)
            predictions_and_stats = json.loads(output_str)
            # Round statistical metrics to the nearest integer
            rounded_predictions = {metric: round(value) for metric, value in predictions_and_stats.items()}
            predictions_and_stats_list.append(rounded_predictions)

    except subprocess.CalledProcessError as e:
        print(f"Error executing Java code: {e}")
        raise RuntimeError('Error executing Java code') from e  # Handle error as needed
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON output from Java: {e}")
        raise RuntimeError('Error decoding JSON output from Java') from e  # Handle JSON decoding error
    return predictions_and_stats_list

In [70]:
# Folder path
folder_path = os.path.join(os.getcwd(), "source/csv_files")
csv_for_result, stand_id_result = filter_csv(folder_path)

print(csv_for_result, stand_id_result)
predictions_and_stats_list = prediction(csv_for_result)
df = pd.DataFrame(predictions_and_stats_list)
df['StandId'] = [items for items in stand_id_result]

# Move 'StandId' to the first column
cols = ['StandId'] + [col for col in df.columns if col != 'StandId']
df = df[cols]


In [71]:
df

Unnamed: 0,StandId,mode,median,mean,highestValue,lowestValue
0,11,523,503,494,578,362
1,22,327,352,357,421,326
2,55,437,417,424,525,368
3,66,407,422,419,449,388
4,111,510,542,543,590,473
5,222,327,332,338,393,326
6,555,431,485,486,582,402
7,666,448,448,447,487,437


In [75]:
# Folder path
folder_path = modified_csv_folder
csv_for_result, stand_id_result = filter_csv(folder_path)
# print(csv_for_result)
df2=None
if csv_for_result != [] and stand_id_result != []:
    predictions_and_stats_list = prediction(csv_for_result)
    df2 = pd.DataFrame(predictions_and_stats_list)
    df2['StandId'] = [items for items in stand_id_result]
    df2['Curr_QMD'] = [int(values[0]) for item, values in dict_for_standIDnCrrnQMD_from_allFields.items()]
    df2['Curr_TPA'] = [int(values[1]) for item, values in dict_for_standIDnCrrnQMD_from_allFields.items()]
    # Move 'StandId' to the first column
    cols = ['StandId'] + [col for col in df2.columns if col != 'StandId']
    df2 = df2[['StandId','median', 'Curr_QMD','Curr_TPA' ]]
    df2 = df2.rename(columns={'median': 'Curr_SDImax'})
    df2['%SDImax'] =  (df2['Curr_TPA'] * 100 )/ df2['Curr_SDImax']
    display("stats with current TPA and current SDImax")
    display(df2)

    

'stats with current TPA and current SDImax'

Unnamed: 0,StandId,Curr_SDImax,Curr_QMD,Curr_TPA,%SDImax
0,11,689,8,700,101.596517
1,22,295,11,400,135.59322
2,55,297,12,390,131.313131
3,66,508,9,550,108.267717


In [76]:
if df2 is not None:
    merged_df = pd.merge(df, df2, how='outer', on='StandId')
    merged_df = merged_df.rename(columns={'Curr_SDImax_x': 'median', 'Curr_SDImax_y': 'Curr_SDImax'})
    display(merged_df)  
    result_folder = os.path.join(cwd,'result/result_with_current_QMD_and_TPA.csv')
    df.to_csv(result_folder)


Unnamed: 0,StandId,mode,median,mean,highestValue,lowestValue,Curr_SDImax,Curr_QMD,Curr_TPA,%SDImax
0,11,523,503,494,578,362,689.0,8.0,700.0,101.596517
1,111,510,542,543,590,473,,,,
2,22,327,352,357,421,326,295.0,11.0,400.0,135.59322
3,222,327,332,338,393,326,,,,
4,55,437,417,424,525,368,297.0,12.0,390.0,131.313131
5,555,431,485,486,582,402,,,,
6,66,407,422,419,449,388,508.0,9.0,550.0,108.267717
7,666,448,448,447,487,437,,,,


In [77]:
result_folder = os.path.join(cwd,'result/result.csv')
df.to_csv(result_folder)