In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('used_cars.csv')

# Select relevant features for clustering
features = ['Year', 'Mileage', 'Price']

# Preprocess the data
X = data[features]
X = X.dropna()  # Drop rows with missing values

# Scale the numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Perform clustering using K-means
k = 3  # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X_scaled)

# Obtain the cluster labels
cluster_labels = kmeans.labels_

In [None]:
# Visualize the clustering results
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=cluster_labels, cmap='viridis')
plt.xlabel('Year')
plt.ylabel('Mileage')
plt.title('Clustering Results')
plt.show()

In [None]:
# Perform anomaly detection using Isolation Forest
forest = IsolationForest(random_state=42)
forest.fit(X_scaled)

# Predict anomalies
anomaly_scores = forest.decision_function(X_scaled)
anomaly_labels = forest.predict(X_scaled)

In [None]:
# Visualize the anomaly detection results
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=-anomaly_labels, cmap='coolwarm')
plt.xlabel('Year')
plt.ylabel('Mileage')
plt.title('Anomaly Detection Results')
plt.show()