### Import libraries

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import rasterio
from rasterio.plot import show
import os
from raster2xyz.raster2xyz import Raster2xyz
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
import time
from geocube.api.core import make_geocube
from sklearn.impute import SimpleImputer
import seaborn as sns
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV, cross_val_score, validation_curve
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, roc_curve, precision_score, recall_score, roc_auc_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel, RFE
import gc
from shapely.geometry import Point
from shapely import wkt



This code snippet extracts raster values from multiple TIFF files for each point in a shapefile and saves the results to a CSV file. It first reads the shapefile containing point geometries. Then, it iterates through each TIFF file, samples raster values at the point coordinates, and adds them as columns to the DataFrame. Finally, it saves the DataFrame to a CSV file and prints out the processing time.

In [None]:
start = time.process_time()
shapefile_path = ""
grid_df = gpd.read_file(shapefile_path)

tif_folder = ""
tif_files = [file for file in os.listdir(tif_folder) if file.endswith('.tif') or file.endswith('.img')]
tif_files_sorted = sorted(tif_files)

def get_raster_values(tif_file, coords):
    with rasterio.open(os.path.join(tif_folder, tif_file)) as src:
        values = [x[0] for x in src.sample(coords)]
    return values

for tif_file in tif_files_sorted:
    column_name = os.path.splitext(tif_file)[0]  
    coord_list = [(x, y) for x, y in zip(grid_df["geometry"].x, grid_df["geometry"].y)]
    grid_df[column_name] = get_raster_values(tif_file, coord_list)
grid_df.to_csv("", sep=',', index=False, header=True)
print("Features extracted to the file")
print("Procesing time in [s]", time.process_time() - start)

This code performs data preprocessing steps, loads a trained model, makes predictions on the preprocessed data, and merges the predictions with the original geometry data.

In [None]:
grid_df.replace([np.inf, -np.inf, 0.0, -1000000000.0, 9999.0], np.nan, inplace=True)
filename = ""
numeric_columns = grid_df.iloc[:, 3:].select_dtypes(include=[np.number]).columns
grid_features_numeric = grid_df.iloc[:, 4:][numeric_columns]
scaler = StandardScaler()
features_scaled = pd.DataFrame(scaler.fit_transform(grid_features_numeric), columns=grid_features_numeric.columns)
imputation = SimpleImputer(missing_values=np.nan, strategy='median')
grid_features_clean = imputation.fit_transform(features_scaled)
loaded_model = pickle.load(open(filename, 'rb'))
result = pd.DataFrame(loaded_model.predict(grid_features_clean))
df1 = pd.merge(grid_df.iloc[:, 3], result, left_index=True, right_index=True)
df1.columns = ['geometry', 'pred_class']


This code snippet converts a GeoDataFrame (grid_gdf) containing predicted classes into a raster format and saves it as a GeoTIFF file. It utilizes the make_geocube function from the GeoCube library to create a raster cube from vector data, specifying the measurement to be the predicted class. The resulting raster is then saved as a GeoTIFF file. Finally, it prints out the path to the saved GeoTIFF file.

In [None]:
start = time.process_time()
grid_gdf= gpd.GeoDataFrame(df1, geometry='geometry',crs='EPSG:32631')
geotif_file =""
res=20
out_grd = make_geocube(
    vector_data=grid_gdf,
    measurements=["pred_class"],
    resolution=(-res, res)
)
out_grd["pred_class"].rio.to_raster(geotif_file)
print ('Classification result in geotiff write as:',geotif_file)
print("Procesing time in [s]", time.process_time() - start)