<a href="https://colab.research.google.com/github/mohammadbadi/Clustering_Frequency/blob/main/Code%20Sections/5.6.3%20DBSCAN%20EPS%20%2B%20Min%20Sample%20Final%20Tuning%20Stage%203.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **5.6.3 DBSCAN EPS + Min Sample Tuning: Stage 3 - Final Tuning**
### **Narrowest jumps of eps and min sample based on Stage 2 - APPROACH_2**

In [None]:
import warnings
import os
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from google.colab import files

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
url = "https://raw.githubusercontent.com/mohammadbadi/Clustering_Frequency/refs/heads/main/Output_CSV/FE_Encoded_New.csv" # URL of the dataset
data = pd.read_csv(url)                                                           # Read the dataset from URL
data = data.sample(frac=0.1, random_state=42)                                     # 10% Sample from the Dataset

                                                                                  # Set the range of chunks to process
start_chunk = 29                                                                  # Beginning value of Range
end_chunk = 30                                                                    # End Value of Range

expected_filenames = [f"DBSCAN_Intermediate_Top10_chunk_{i}.csv" for i in range(start_chunk, end_chunk + 1)]

                                                                                  # Check for missing files in the Colab environment
missing_files = [fname for fname in expected_filenames if not os.path.exists(fname)]
if missing_files:
    print(f"Error: The following expected files were not found: {missing_files}")
else:
    print("All expected files found. Processing...")

for expected_filename in expected_filenames:
    if not os.path.exists(expected_filename):
        continue

    intermediate_df = pd.read_csv(expected_filename)
    final_results = []

    for idx, row in intermediate_df.iterrows():
        set_number = row["set_number"]
        feature_set = eval(row["features"])
        if pd.isnull(row["best_eps"]) or pd.isnull(row["best_min_samples"]):
            continue

        top10_eps_list = eval(row["top10_eps"])
        top10_min_samples_list = eval(row["top10_min_samples"])

        eps_min, eps_max = min(top10_eps_list), max(top10_eps_list)
        eps_grid_fine = np.arange(eps_min, eps_max + 0.1, 0.1)

        ms_min, ms_max = min(top10_min_samples_list), max(top10_min_samples_list)
        min_samples_grid_fine = list(range(int(ms_min), int(ms_max) + 1))

        missing_features = [feat for feat in feature_set if feat not in data.columns]
        if missing_features:
            continue

        df_subset = data[feature_set]
        df_subset = pd.DataFrame(StandardScaler().fit_transform(df_subset), columns=df_subset.columns)

        score_list = []
        for eps in eps_grid_fine:
            for min_samples in min_samples_grid_fine:
                try:
                    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
                    labels = dbscan.fit_predict(df_subset)
                    if len(set(labels)) > 1 and any(label != -1 for label in labels):
                        score = silhouette_score(df_subset, labels)
                        score_list.append((score, eps, min_samples))
                except Exception:
                    continue

        if score_list:
            score_list.sort(key=lambda x: x[0], reverse=True)
            best_score, best_eps, best_min_samples = score_list[0]
            final_results.append({
                "set_number": set_number,
                "features": feature_set,
                "best_eps": best_eps,
                "best_min_samples": best_min_samples,
                "best_silhouette_score": best_score
            })
            print(f"Fine tuning: Set {set_number} processed. Best: (eps: {best_eps}, min_samples: {best_min_samples}), Score: {best_score:.4f}")
        else:
            final_results.append({
                "set_number": set_number,
                "features": feature_set,
                "best_eps": None,
                "best_min_samples": None,
                "best_silhouette_score": None
            })
            print(f"Fine tuning: Set {set_number} processed. No valid clustering found.")

    final_df = pd.DataFrame(final_results)
    output_filename = expected_filename.replace("Intermediate_Top10", "Best_Fine_Params")
    final_df.to_csv(output_filename, index=False)
    print(f"\nFine tuning for '{expected_filename}' complete. Final best parameters saved to '{output_filename}'.")

    files.download(output_filename)                                               # Download the dataframe as csv file


All expected files found. Processing...
Fine tuning: Set 4005 processed. Best: (eps: 0.7999999999999999, min_samples: 3), Score: 0.4320
Fine tuning: Set 4006 processed. Best: (eps: 1.6, min_samples: 3), Score: 0.3880
Fine tuning: Set 4007 processed. Best: (eps: 1.9000000000000004, min_samples: 3), Score: 0.3876
Fine tuning: Set 4008 processed. Best: (eps: 2.0000000000000004, min_samples: 3), Score: 0.3899
Fine tuning: Set 4009 processed. Best: (eps: 0.7999999999999999, min_samples: 3), Score: 0.4377
Fine tuning: Set 4010 processed. Best: (eps: 1.6, min_samples: 3), Score: 0.3933
Fine tuning: Set 4011 processed. Best: (eps: 1.9000000000000004, min_samples: 3), Score: 0.3928
Fine tuning: Set 4012 processed. Best: (eps: 2.0000000000000004, min_samples: 3), Score: 0.3951
Fine tuning: Set 4014 processed. Best: (eps: 1.5, min_samples: 3), Score: 0.2371
Fine tuning: Set 4015 processed. Best: (eps: 1.1999999999999997, min_samples: 3), Score: 0.2312
Fine tuning: Set 4016 processed. Best: (eps: 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Fine tuning: Set 4148 processed. Best: (eps: 1.6, min_samples: 8), Score: 0.1969
Fine tuning: Set 4149 processed. Best: (eps: 1.5, min_samples: 3), Score: 0.3836
Fine tuning: Set 4150 processed. Best: (eps: 1.7000000000000002, min_samples: 3), Score: 0.3490
Fine tuning: Set 4151 processed. Best: (eps: 1.9000000000000004, min_samples: 3), Score: 0.3489
Fine tuning: Set 4152 processed. Best: (eps: 2.1000000000000005, min_samples: 3), Score: 0.3512
Fine tuning: Set 4153 processed. Best: (eps: 1.5, min_samples: 3), Score: 0.3889
Fine tuning: Set 4154 processed. Best: (eps: 1.7000000000000002, min_samples: 3), Score: 0.3539
Fine tuning: Set 4155 processed. Best: (eps: 1.9000000000000004, min_samples: 3), Score: 0.3538
Fine tuning: Set 4156 processed. Best: (eps: 2.1000000000000005, min_samples: 3), Score: 0.3561
Fine tuning: Set 4157 processed. Best: (eps: 0.9999999999999999, min_samples: 4), Score: 0.2287
Fine tuning: Set 4158 processed. Best: (eps: 1.6, min_samples: 4), Score: 0.2313
Fine

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>