In [21]:
#import
import geopandas as gpd
import pandas as pd
import rasterio
import rasterio.mask
import tempfile
import fiona
import os
import glob
import numpy as np
import traceback
import subprocess
import json

In [22]:
# get the working directory
cwd = os.getcwd()

# get the shapefile folder
shapefile_folder = os.path.join(cwd, 'shapefile')

# Pattern to match shapefile (shapefile typically have extensions like .shp)
shapefile_pattern = os.path.join(shapefile_folder, '*.shp')
# Get the list of all shapefile in the folder
shapefiles_list = glob.glob(shapefile_pattern)

# Count the number of shapefile
num_shapefiles = len(shapefiles_list)

# get the java folder
java_folder = os.path.join(cwd, 'source/java')

# get the csv folder
csv_folder = os.path.join(cwd, 'source/csv_files')

# get the raster file
raster_folder = os.path.join(cwd,'source/rasters')
tiffile = os.path.join(raster_folder,'raster.tif')

# Get the parent directory
parent_dir = os.path.dirname(cwd)


In [27]:
def to_csv(out_image, csv_folder, index, parameters, csv_files, stand_id ):
        """
        Process the raster file and save the band data to CSV files for each shape.

        Parameters:
        - out_image (numpy array): Masked raster data.
        - model_parameters (dict): Dictionary containing shape parameters.
        - output_folder (str): Folder to save the CSV files.
        Returns:
        - List of CSV filenames.
        """     
        os.makedirs(csv_folder, exist_ok=True)
        column_names = ['aws0_100', 'DEP2RES', 'HeatLoad', 'MAPMCMT', 'Rad_sm', 'B_TD', 'soc_05','soc0_20','slope','pratio','consLITH']
        with rasterio.open(tiffile) as src:
            # Flatten band data and create DataFrame
            band_data = {f'band_{j+1}': out_image[j].flatten() for j in range(out_image.shape[0])}
            df = pd.DataFrame(band_data)
            # Replace the nodata values with NaN
            for band in band_data:
                df[band] = df[band].replace(src.nodata, np.nan)
            # Drop rows with NaN values
            df_cleaned = df.dropna(how='any')
            if not df_cleaned.empty:
                # Drop columns that contain any NaN values
                df_cleaned = df_cleaned.dropna(axis=1, how='any')
                # Rename columns
                if len(column_names) >= df_cleaned.shape[1]:
                    df_cleaned.columns = column_names[:df_cleaned.shape[1]]
                # Extend the csv file with the model parameters
                param_df = pd.DataFrame({key: [value[0]] * len(df_cleaned) for key, value in parameters.items()})
                param_df = param_df.apply(lambda x: x[0] if isinstance(x, list) else x)
                df_extended = pd.concat([df_cleaned, param_df], axis=1)
                # Again drop the NaN values
                df_extended = df_extended.dropna()
                # Save to CSV
                csv_filename = f"csv_for_stand_{stand_id}.csv"
                csv_path = os.path.join(csv_folder, csv_filename)
                csv_files.append(csv_path)
                df_extended.to_csv(csv_path, index=False)
  

In [28]:

# csvfile list
csv_files = []
# Define desired columns
desired_columns = ['QMD', 'RC_PROP', 'WH_PROP', 'GF_PROP', 'LP_PROP', 'WL_PROP', 'DF_PROP', 'PP_PROP']
stand_id = []
# Ensure the output directory exists and remove all existing CSV files in the output folder
if os.path.exists(csv_folder):
    for file in glob.glob(os.path.join(csv_folder, '*.csv')):
        os.remove(file)
else:
    os.makedirs(csv_folder, exist_ok=True)

for shapefile in shapefiles_list:
    # Path to your shapefile
    shapefile_path = shapefile
    # Read the main shapefile
    try:
        ply = gpd.read_file(shapefile_path)
    except Exception as ex:
        print(f"Unable to read shapefile  {shapefile_path}. Your file may be corrupt: {ex}")
        raise

    # Change CRS to match raster
    ply = ply.to_crs(epsg=3857)
    with tempfile.NamedTemporaryFile() as tf:
        crsName = tf.name
    ply.to_file(crsName)  # Write new shapefile 
    # Read and process shapefile
    with rasterio.open(tiffile) as geotiff:
        with fiona.open(crsName, "r") as shapefile:
            shapes = [feature["geometry"] for feature in shapefile]
            for index, shape in enumerate(shapes):
                try:
                    out_image, out_transform = rasterio.mask.mask(geotiff, [shape], crop=True)                 
                    # Extract desired columns from the clipped shapefile
                    clipped_shapefile = ply.clip(gpd.GeoDataFrame(geometry=[shape], crs=ply.crs))
                    extracted_data = clipped_shapefile[desired_columns]                   
                    # Extract StandID as integer and append to stand_id list
                    stand_id_value = int(clipped_shapefile['StandID'].values[0])
                    stand_id.append(stand_id_value)                  
                    # Convert the extracted data to a dictionary
                    extracted_data_dict_list = extracted_data.to_dict(orient='records')
                    # Optionally, convert the list of dictionaries into a single dictionary if needed
                    # For example, if you want a dictionary with column names as keys and lists of values as values:
                    extracted_data_dict = extracted_data.to_dict(orient='list')
                    # Call to_csv function to create and save the CSV file
                    to_csv(out_image, csv_folder, index, extracted_data_dict, csv_files, stand_id[-1])
                except Exception as ex:
                    print(f"Error processing shape {index}: {str(ex)}")
                    traceback.print_exc()  # Print the full traceback
                    print("\n")
                    print("\n")
                    print("\n")



In [29]:
print(stand_id)

[11, 12, 13, 14, 15, 16, 2016, 1978, 1993, 1995, 2000, 2012, 2016]


In [30]:
for item  in csv_files:
    print(item)
    # print("")

c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_11.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_12.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_13.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_14.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_15.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_16.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_2016.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_1978.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_1993.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_1995.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_2000.csv
c:\Users\raun9583\Downloads\new_model_test\source/csv_files\csv_for_stand_2012.csv
c:\Users\raun958

In [32]:
# for item in csv_files:
#     print(f"CSV File: {item}")
predictions_and_stats_list = []
# Navigate to the 'java' directory (adjust path as needed)
cwd = os.getcwd()  # Assuming cwd is defined somewhere earlier
JAVA_SERVICE_PATH = os.path.join(cwd, 'source/java')
H2O_GENMODEL_JAR_PATH = os.path.join(JAVA_SERVICE_PATH, 'h2o-genmodel.jar')
GSON_JAR_PATH = os.path.join(JAVA_SERVICE_PATH, 'gson-2.8.8.jar')
MAIN_JAVA_FILE_PATH = os.path.join(JAVA_SERVICE_PATH, 'main1.java')


# Check if Java files and JARs exist
if not os.path.exists(H2O_GENMODEL_JAR_PATH):
    raise FileNotFoundError(f"JAR file not found: {H2O_GENMODEL_JAR_PATH}")
if not os.path.exists(GSON_JAR_PATH):
    raise FileNotFoundError(f"JAR file not found: {GSON_JAR_PATH}")
if not os.path.exists(MAIN_JAVA_FILE_PATH):
    raise FileNotFoundError(f"Java file not found: {MAIN_JAVA_FILE_PATH}")

# Use the correct classpath separator
classpath_separator = os.pathsep
classpath = f'.{classpath_separator}{H2O_GENMODEL_JAR_PATH}{classpath_separator}{GSON_JAR_PATH}'

try:
    for item in csv_files:
        # Compile the Java code
        compile_command = ['javac', '-cp', classpath, 'main1.java']
        subprocess.run(compile_command, cwd=JAVA_SERVICE_PATH, check=True)
        # Execute the Java code and capture the output
        run_command = ['java', '-cp', classpath, 'main1', item]
        output = subprocess.check_output(run_command, cwd=JAVA_SERVICE_PATH)
        output_str = output.decode('utf-8')
        # Process the output (assuming it's JSON)
        predictions_and_stats = json.loads(output_str)
        # Round statistical metrics to the nearest integer
        rounded_predictions = {metric: round(value) for metric, value in predictions_and_stats.items()}
        predictions_and_stats_list.append(rounded_predictions)

except subprocess.CalledProcessError as e:
    print(f"Error executing Java code: {e}")
    raise RuntimeError('Error executing Java code') from e  # Handle error as needed
except json.JSONDecodeError as e:
    print(f"Error decoding JSON output from Java: {e}")
    raise RuntimeError('Error decoding JSON output from Java') from e  # Handle JSON decoding error

In [33]:
# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(predictions_and_stats_list)
df['StandId'] = [items for items in stand_id]
# Adjust display options to show full content of columns
pd.set_option('display.max_colwidth', None)


In [34]:
df

Unnamed: 0,mode,median,mean,highestValue,lowestValue,StandId
0,343,343,337,343,330,11
1,333,333,333,333,333,12
2,330,330,332,343,330,13
3,333,333,333,333,330,14
4,333,333,333,355,330,15
5,351,351,349,360,340,16
6,356,356,359,384,340,2016
7,343,343,337,343,330,1978
8,330,330,332,343,330,1993
9,333,333,335,396,330,1995


In [35]:
result_folder = os.path.join(cwd,'result/result.csv')
df.to_csv(result_folder)