# Stage 2: OPTICS Clustering
This notebook applies the OPTICS algorithm to the freelancer dataset and visualizes the clustering results.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import OPTICS
from sklearn.metrics import silhouette_score

# Load cleaned dataset (as used in KMeans)
df = pd.read_csv("../data/freelancer_earnings_bd_cleaned.csv")
df.head()


## Feature Selection and Scaling

In [None]:
features = ['job_completed', 'earnings_usd', 'hourly_rate', 'job_success_rate',
            'client_rating', 'job_duration_days', 'rehire_rate', 'marketing_spend']
X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


## Applying OPTICS Clustering

In [None]:
optics = OPTICS(min_samples=5, xi=0.05, min_cluster_size=0.05)
labels = optics.fit_predict(X_scaled)
df['optics_cluster'] = labels

n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
silhouette = silhouette_score(X_scaled, labels) if n_clusters > 1 else None

print(f"Clusters found: {n_clusters}")
print(f"Silhouette Score: {silhouette:.2f}" if silhouette else "Silhouette Score: N/A")


## PCA Visualization of OPTICS Clusters

In [None]:
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
df['pca1'] = X_pca[:, 0]
df['pca2'] = X_pca[:, 1]

plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='pca1', y='pca2', hue='optics_cluster', palette='tab10')
plt.title(f'OPTICS Clustering (PCA Projection)\nClusters: {n_clusters}' +
          (f' | Silhouette: {silhouette:.2f}' if silhouette else ''))
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.legend(title='Cluster', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
