<a href="https://colab.research.google.com/github/mhsnur/TESIS-S2-Geomatika-FT-UGM-Muhsin-Nur-Alamsyah/blob/main/RANDOM_FOREST/RANDOM_FOREST_TEKNIK_UGM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ============================================
# 1. INSTALL & IMPORT LIBRARY
# ============================================
!pip install laspy cloth-simulation-filter numpy scikit-learn scipy --quiet

import laspy
import numpy as np
import os
import CSF
from scipy.spatial import cKDTree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from google.colab import drive

# ============================================
# 2. MOUNT GOOGLE DRIVE
# ============================================
drive.mount('/content/drive')

# ============================================
# 3. FUNGSI CSF FILTERING (Steep Slope + Slope Processing)
# ============================================
def csf_ground_non_ground_steep(las_path, save_ground_path=None, save_non_ground_path=None):
    las = laspy.read(las_path)
    points = las.points
    xyz = np.vstack((las.x, las.y, las.z)).T

    csf = CSF.CSF()
    csf.params.cloth_resolution = 1.5
    csf.params.threshold = 0.4
    csf.params.rigidness = 3
    csf.params.bSloopSmooth = True

    csf.setPointCloud(xyz.tolist())

    ground = CSF.VecInt()
    non_ground = CSF.VecInt()

    csf.do_filtering(ground, non_ground)

    ground_idx = np.array(ground, dtype=int)
    non_ground_idx = np.array(non_ground, dtype=int)

    # Simpan hasil LAS
    if save_ground_path:
        las_ground = laspy.create(point_format=las.header.point_format, file_version=las.header.version)
        las_ground.points = points[ground_idx]
        las_ground.header.offsets = las.header.offsets
        las_ground.header.scales = las.header.scales
        las_ground.write(save_ground_path)

    if save_non_ground_path:
        las_non_ground = laspy.create(point_format=las.header.point_format, file_version=las.header.version)
        las_non_ground.points = points[non_ground_idx]
        las_non_ground.header.offsets = las.header.offsets
        las_non_ground.header.scales = las.header.scales
        las_non_ground.write(save_non_ground_path)

    return ground_idx, non_ground_idx


# ============================================
# 4. JALANKAN CSF FILTERING
# ============================================
input_las = "/content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/Teknik_UGM_clean.las"
ground_output = "/content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/ground_uc.las"
nonground_output = "/content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/non_ground_uc.las"

ground_idx, non_ground_idx = csf_ground_non_ground_steep(
    las_path=input_las,
    save_ground_path=ground_output,
    save_non_ground_path=nonground_output
)

print(f"Jumlah titik ground: {len(ground_idx)}")
print(f"Jumlah titik non-ground: {len(non_ground_idx)}")
print(f"✅ Ground disimpan di: {ground_output}")
print(f"✅ Non-ground disimpan di: {nonground_output}")


# ============================================
# 5. FUNGSI FITUR TAMBAHAN (NORMAL Z & DENSITY)
# ============================================
def compute_local_features(xyz, radius=1.0, k_neighbors=12):
    tree = cKDTree(xyz)
    normals = np.zeros((len(xyz), 3))
    density = np.zeros((len(xyz), 1))

    for i in range(len(xyz)):
        idx = tree.query_ball_point(xyz[i], r=radius)
        neighbors = xyz[idx]
        if len(neighbors) < 3:
            continue
        centered = neighbors - neighbors.mean(axis=0)
        cov = np.cov(centered.T)
        eigvals, eigvecs = np.linalg.eigh(cov)
        normal = eigvecs[:, 0]
        if normal[2] < 0:
            normal = -normal
        normals[i, :] = normal
        density[i] = len(neighbors) / (4/3 * np.pi * radius**3)

    normal_z = normals[:, 2].reshape(-1, 1)
    return normal_z, density


# ============================================
# 6. FUNGSI EKSTRAKSI FITUR KOMPLET
# ============================================
def extract_features_complete(las_data, radius=1.0, k_neighbors=10):
    xyz = np.vstack((las_data.x, las_data.y, las_data.z)).T

    # RGB
    if hasattr(las_data, "red"):
        R = las_data.red.astype(float)
        G = las_data.green.astype(float)
        B = las_data.blue.astype(float)
        rgb_sum = R + G + B + 1e-6
        green_ratio = (G / rgb_sum).reshape(-1, 1)
    else:
        green_ratio = np.zeros((len(xyz), 1))

    # Intensity
    intensity = las_data.intensity.astype(float)
    if np.max(intensity) > 0:
        intensity = intensity / np.max(intensity)
    intensity = intensity.reshape(-1, 1)

    # z_std (keragaman tinggi lokal)
    tree = cKDTree(xyz)
    z_std_list = np.zeros(len(xyz), dtype=np.float32)
    for i in range(len(xyz)):
        _, idx = tree.query(xyz[i], k=k_neighbors)
        neighbors = xyz[idx]
        z_std_list[i] = np.std(neighbors[:, 2])
    z_std_list = z_std_list.reshape(-1, 1)

    # Normal Z dan Density
    normal_z, density = compute_local_features(xyz, radius, k_neighbors)

    # Gabungkan semua fitur
    features = np.hstack((green_ratio, intensity, z_std_list, normal_z, density))
    return features


# ============================================
# 7. BACA DATA TRAINING (VEGETASI & BANGUNAN)
# ============================================
veg_las = laspy.read("/content/drive/MyDrive/Segmented_LAS/DATA TESIS/VEGETASI_1.las")
bdg_las = laspy.read("/content/drive/MyDrive/Segmented_LAS/DATA TESIS/BANGUNAN_1.las")

features_veg = extract_features_complete(veg_las)
features_bdg = extract_features_complete(bdg_las)

labels_veg = np.full(len(features_veg), 5)  # Vegetasi
labels_bdg = np.full(len(features_bdg), 6)  # Bangunan

X = np.vstack((features_veg, features_bdg))
y = np.hstack((labels_veg, labels_bdg))


# ============================================
# 8. TRAIN RANDOM FOREST CLASSIFIER
# ============================================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=150, max_depth=20, random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("=== Evaluasi Model RF ===")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


# ============================================
# 9. KLASIFIKASI NON-GROUND DARI DATA UTAMA
# ============================================
las_all = laspy.read(input_las)
non_ground_points = laspy.create(point_format=las_all.header.point_format, file_version=las_all.header.version)
non_ground_points.points = las_all.points[non_ground_idx]

features_non_ground = extract_features_complete(non_ground_points)
predicted_labels = clf.predict(features_non_ground)


# ============================================
# 10. GABUNGKAN LABEL GROUND & NON-GROUND
# ============================================
final_labels = np.zeros(len(las_all.points), dtype=np.uint8)
final_labels[ground_idx] = 2                # 2 = Ground
final_labels[non_ground_idx] = predicted_labels  # 5=Vegetasi, 6=Bangunan


# ============================================
# 11. SIMPAN HASIL KLASIFIKASI FINAL
# ============================================
final_las = laspy.create(point_format=las_all.header.point_format, file_version=las_all.header.version)
final_las.points = las_all.points
final_las.classification = final_labels
final_las.header.offsets = las_all.header.offsets
final_las.header.scales = las_all.header.scales

output_final = "/content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/TEKNIK_segmented_RF_green_normal_density.las"
final_las.write(output_final)

print(f"✅ Hasil klasifikasi final disimpan di: {output_final}")


# ============================================
# 12. TAMPILKAN FEATURE IMPORTANCE
# ============================================
import pandas as pd
feature_names = ["green_ratio", "intensity", "z_std", "normal_z", "density"]
importance = pd.DataFrame({
    'Feature': feature_names,
    'Importance': clf.feature_importances_
}).sort_values('Importance', ascending=False)

print("\n=== Feature Importance ===")
print(importance)


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.1/86.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m53.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Jumlah titik ground: 17782728
Jumlah titik non-ground: 41895171
✅ Ground disimpan di: /content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/ground_uc.las
✅ Non-ground disimpan di: /content/drive/MyDrive/Segmented_LAS/DATA TESIS/tambah _data/TEKNIK_UGM/non_g

=== Evaluasi Model RF ===
              precision    recall  f1-score   support

           5       1.00      0.99      0.99    624498
           6       0.99      1.00      1.00    835967

    accuracy                           0.99   1460465
   macro avg       0.99      0.99      0.99   1460465
weighted avg       0.99      0.99      0.99   1460465

[[619890   4608]
 [  3110 832857]]
✅ Hasil klasifikasi final disimpan di: /content/drive/MyDrive/Segmented_LAS/DATA TESIS/hasil_csf_rf_segmentasi.las
