<a href="https://colab.research.google.com/github/mhsnur/TESIS-S2-Geomatika-FT-UGM-Muhsin-Nur-Alamsyah/blob/main/RANDOM_FOREST/RANDOM_FOREST_KOTA_BARU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install laspy cloth-simulation-filter numpy


import laspy
import numpy as np
import os
import CSF
from scipy.spatial import cKDTree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive


def csf_ground_non_ground(las_path,
                          cloth_resolution=2.0,
                          threshold=0.5,
                          rigidness=6,
                          slope_smooth=True,
                          save_ground_path=None,
                          save_non_ground_path=None):
    """
    Gunakan CSF untuk memisahkan ground dan non-ground dari point cloud.
    """

    las = laspy.read(las_path)
    points = las.points
    xyz = np.vstack((las.x, las.y, las.z)).T

    csf = CSF.CSF()

    csf.params.cloth_resolution = cloth_resolution
    csf.params.threshold = threshold
    csf.params.rigidness = rigidness
    csf.params.bSloopSmooth = slope_smooth

    csf.setPointCloud(xyz.tolist())

    ground = CSF.VecInt()
    non_ground = CSF.VecInt()

    csf.do_filtering(ground, non_ground)

    ground_idx = np.array(ground, dtype=int)
    non_ground_idx = np.array(non_ground, dtype=int)

    if save_ground_path:
        las_ground = laspy.create(point_format=las.header.point_format,
                                  file_version=las.header.version)
        las_ground.points = points[ground_idx]
        las_ground.header.offsets = las.header.offsets
        las_ground.header.scales = las.header.scales
        las_ground.write(save_ground_path)

    if save_non_ground_path:
        las_non_ground = laspy.create(point_format=las.header.point_format,
                                      file_version=las.header.version)
        las_non_ground.points = points[non_ground_idx]
        las_non_ground.header.offsets = las.header.offsets
        las_non_ground.header.scales = las.header.scales
        las_non_ground.write(save_non_ground_path)

    return ground_idx, non_ground_idx

path_base = '/content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/KOTA BARU'
input_las = os.path.join(path_base, "area_uji.las")
ground_output = os.path.join(path_base, "ground_csf.las")
nonground_output = os.path.join(path_base, "non_ground_csf.las")

ground_idx, non_ground_idx = csf_ground_non_ground(
    las_path=input_las,
    cloth_resolution=2.0,
    threshold=0.5,
    rigidness=6,
    slope_smooth=True,
    save_ground_path=ground_output,
    save_non_ground_path=nonground_output
)


print(f"Jumlah titik ground: {len(ground_idx)}")
print(f"Jumlah titik non-ground: {len(non_ground_idx)}")
print(f"‚úÖ Ground disimpan di: {ground_output}")
print(f"‚úÖ Non-ground disimpan di: {nonground_output}")

Collecting laspy
  Downloading laspy-2.6.1-py3-none-any.whl.metadata (3.8 kB)
Collecting cloth-simulation-filter
  Downloading cloth_simulation_filter-1.1.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)
Downloading laspy-2.6.1-py3-none-any.whl (86 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m86.1/86.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cloth_simulation_filter-1.1.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.6 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.6/1.6 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cloth-simulation-filter, laspy
Successfully installed cloth-simulation-filter-1.1.7 laspy-2.6.1
Jumlah titik ground: 1784345
Jumlah titik non-ground: 7883018
‚úÖ Ground 

In [None]:

from scipy.spatial import cKDTree

def extract_features(las_data, k_neighbors=10):
    xyz = np.vstack((las_data.x, las_data.y, las_data.z)).T
    rgb = np.vstack((las_data.red, las_data.green, las_data.blue)).T
    intensity = las_data.intensity

    tree = cKDTree(xyz)
    z_std_list = []
    for i in range(len(xyz)):
        _, idx = tree.query(xyz[i], k=k_neighbors)
        neighbors = xyz[idx]
        z_std = np.std(neighbors[:, 2])
        z_std_list.append(z_std)
    z_std_array = np.array(z_std_list)

    features = np.hstack((
        rgb,
        intensity.reshape(-1, 1),
        z_std_array.reshape(-1, 1)
    ))

    return features

veg_las = laspy.read(os.path.join(path_base, "bangunan.las"))
bdg_las = laspy.read(os.path.join(path_base, "vegetasi.las"))

features_veg = extract_features(veg_las)
features_bdg = extract_features(bdg_las)

labels_veg = np.full(len(features_veg), 5)
labels_bdg = np.full(len(features_bdg), 6)

X = np.vstack((features_veg, features_bdg))
y = np.hstack((labels_veg, labels_bdg))


from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("=== Evaluasi Model RF ===")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

las_all = laspy.read(input_las)
non_ground_points = laspy.create(point_format=las_all.header.point_format, file_version=las_all.header.version)
non_ground_points.points = las_all.points[non_ground_idx]

features_non_ground = extract_features(non_ground_points)
predicted_labels = clf.predict(features_non_ground)


final_labels = np.zeros(len(las_all.points), dtype=np.uint8)
final_labels[ground_idx] = 2
final_labels[non_ground_idx] = predicted_labels


final_las = laspy.create(point_format=las_all.header.point_format, file_version=las_all.header.version)
final_las.points = las_all.points
final_las.classification = final_labels
final_las.header.offsets = las_all.header.offsets
final_las.header.scales = las_all.header.scales

output_final = os.path.join(path_base, "hasil_segmentasi_rf.las")
final_las.write(output_final)

print(f"‚úÖ Hasil klasifikasi final disimpan di: {output_final}")


=== Evaluasi Model RF ===
              precision    recall  f1-score   support

           5       0.99      0.99      0.99    315341
           6       0.96      0.96      0.96     81967

    accuracy                           0.98    397308
   macro avg       0.97      0.97      0.97    397308
weighted avg       0.98      0.98      0.98    397308

[[311960   3381]
 [  3565  78402]]
‚úÖ Hasil klasifikasi final disimpan di: /content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/KOTA BARU/hasil_segmentasi_rf.las


In [None]:
from tqdm import tqdm

# ============================================
# 4Ô∏è‚É£ EKSTRAKSI FITUR (tanpa density)
# ============================================
def extract_features(las_data, ground_z_mean=None, k_neighbors=40):
    xyz = np.vstack((las_data.x, las_data.y, las_data.z)).T
    intensity = las_data.intensity.astype(np.float32)

    # RGB Normalisasi
    if hasattr(las_data, "red") and hasattr(las_data, "green") and hasattr(las_data, "blue"):
        rgb = np.vstack((las_data.red, las_data.green, las_data.blue)).T.astype(np.float32)
        rgb_norm = rgb / 65535.0 if rgb.max() > 1 else rgb / 255.0
    else:
        rgb_norm = np.zeros((len(xyz), 3), dtype=np.float32)

    # Normalisasi intensitas
    intensity /= np.percentile(intensity, 99)

    tree = cKDTree(xyz)
    z_std_list, slope_list, normal_z_list, rel_h_list, z_mean_list = [], [], [], [], []

    print("üîç Mengekstraksi fitur (RGB + Intensitas + Geometri)...")
    for i in tqdm(range(len(xyz))):
        _, idx = tree.query(xyz[i], k=k_neighbors)
        neighbors = xyz[idx]

        # Z_STD (variasi tinggi lokal)
        z_std = np.std(neighbors[:, 2])
        z_mean = np.mean(neighbors[:, 2])

        # NORMAL & SLOPE
        cov = np.cov(neighbors.T)
        eigvals, eigvecs = np.linalg.eigh(cov)
        normal = eigvecs[:, np.argmin(eigvals)]
        normal_z = abs(normal[2])
        slope = np.degrees(np.arccos(normal_z))

        # RELATIVE HEIGHT
        if ground_z_mean is not None:
            rel_h = xyz[i, 2] - ground_z_mean
        else:
            rel_h = xyz[i, 2] - np.min(neighbors[:, 2])

        z_std_list.append(z_std)
        slope_list.append(slope)
        normal_z_list.append(normal_z)
        rel_h_list.append(rel_h)
        z_mean_list.append(z_mean)

    # Gabungkan semua fitur
    features = np.hstack((
        rgb_norm,
        intensity.reshape(-1, 1),
        np.array(z_std_list).reshape(-1, 1),
        np.array(slope_list).reshape(-1, 1),
        np.array(normal_z_list).reshape(-1, 1),
        np.array(rel_h_list).reshape(-1, 1),
        np.array(z_mean_list).reshape(-1, 1)
    ))

    return features


# ============================================
# 5Ô∏è‚É£ DATA TRAINING (VEGETASI & BANGUNAN)
# ============================================
veg_las = laspy.read(os.path.join(path_base, "VEGETASI_1.las"))
bdg_las = laspy.read(os.path.join(path_base, "BANGUNAN_1.las"))

features_veg = extract_features(veg_las)
features_bdg = extract_features(bdg_las)

labels_veg = np.full(len(features_veg), 5)  # Vegetasi
labels_bdg = np.full(len(features_bdg), 6)  # Bangunan

X = np.vstack((features_veg, features_bdg))
y = np.hstack((labels_veg, labels_bdg))

print("\n‚úÖ Ekstraksi fitur selesai.")
print(f"Total data training: {len(X)} titik.")


# ============================================
# 6Ô∏è‚É£ TRAIN RANDOM FOREST
# ============================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

clf = RandomForestClassifier(
    n_estimators=250,
    max_depth=22,
    min_samples_leaf=3,
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("\n=== üìà Evaluasi Model Random Forest ===")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


# ============================================
# 7Ô∏è‚É£ KLASIFIKASI SELURUH NON-GROUND
# ============================================
las_all = laspy.read(input_las)
non_ground_points = laspy.create(point_format=las_all.header.point_format,
                                 file_version=las_all.header.version)
non_ground_points.points = las_all.points[non_ground_idx]

# Ambil rata-rata tinggi ground untuk fitur relative height
ground_z_mean = np.mean(las_all.z[ground_idx])

features_non_ground = extract_features(non_ground_points, ground_z_mean=ground_z_mean)
predicted_labels = clf.predict(features_non_ground)

# Gabungkan semua hasil
final_labels = np.zeros(len(las_all.points), dtype=np.uint8)
final_labels[ground_idx] = 2  # ground
final_labels[non_ground_idx] = predicted_labels

# Simpan hasil akhir
final_las = laspy.create(point_format=las_all.header.point_format,
                         file_version=las_all.header.version)
final_las.points = las_all.points
final_las.classification = final_labels
final_las.header.offsets = las_all.header.offsets
final_las.header.scales = las_all.header.scales

output_final = os.path.join(path_base, "hasil_segmentasi_RF_TANPA_density.las")
final_las.write(output_final)

print(f"\n‚úÖ Hasil klasifikasi final disimpan di: {output_final}")

üîç Mengekstraksi fitur (RGB + Intensitas + Geometri)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3120970/3120970 [11:37<00:00, 4476.49it/s]


üîç Mengekstraksi fitur (RGB + Intensitas + Geometri)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4181354/4181354 [15:24<00:00, 4523.10it/s]



‚úÖ Ekstraksi fitur selesai.
Total data training: 7302324 titik.

=== üìà Evaluasi Model Random Forest ===
              precision    recall  f1-score   support

           5       1.00      1.00      1.00    624194
           6       1.00      1.00      1.00    836271

    accuracy                           1.00   1460465
   macro avg       1.00      1.00      1.00   1460465
weighted avg       1.00      1.00      1.00   1460465

[[623295    899]
 [  1172 835099]]
üîç Mengekstraksi fitur (RGB + Intensitas + Geometri)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12620969/12620969 [45:19<00:00, 4641.04it/s]



‚úÖ Hasil klasifikasi final disimpan di: /content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/KOTA BARU/hasil_segmentasi_RF_TANPA_density.las
