<h1><font color='green'>Setup and Libraries</font></h1>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import silhouette_score, silhouette_samples, classification_report
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

<h1><font color='green'>Load Data</font></h1>

In [None]:
# Placeholder for loading data
data = pd.DataFrame({
    'feature1': np.random.rand(450),
    'feature2': np.random.rand(450),
    'feature3': np.random.rand(450),
    'label': np.random.randint(0, 4, 450)
})
data.head()

<h1><font color='green'>Data Exploration</font></h1>

In [None]:
data.info()

In [None]:
data.describe()

<h1><font color='green'>Pairplot and Correlation</font></h1>

In [None]:
sns.pairplot(data, hue='label')
plt.show()

In [None]:
corr_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

<h1><font color='green'>KMeans Clustering</font></h1>

In [None]:
clusters = 10
inertias = []
sil_scores = []

for cluster in range(2, clusters+1):
    kmeans = KMeans(n_clusters=cluster, random_state=42)
    y_kmeans = kmeans.fit_predict(data[['feature1', 'feature2', 'feature3']])
    inertias.append(kmeans.inertia_)
    sil_scores.append(silhouette_score(data[['feature1', 'feature2', 'feature3']], y_kmeans))

plt.figure(figsize=(12, 6))
plt.plot(range(2, clusters+1), inertias, marker='o', linestyle='--')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.show()

plt.figure(figsize=(12, 6))
plt.plot(range(2, clusters+1), sil_scores, marker='o', linestyle='--')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Method')
plt.show()

<h1><font color='green'>Final Clustering</font></h1>

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
data['cluster'] = kmeans.fit_predict(data[['feature1', 'feature2', 'feature3']])
sns.scatterplot(x='feature1', y='feature2', hue='cluster', data=data, palette='viridis')
plt.title('KMeans Clustering')
plt.show()

<h1><font color='green'>Feature Selection and Random Forest Classifier</font></h1>

In [None]:
# Splitting data into features and labels
X = data[['feature1', 'feature2', 'feature3']]
y = data['label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))

# Feature importance
importances = rf.feature_importances_
features = X.columns
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(10, 6))
plt.title('Feature Importances')
plt.bar(range(X.shape[1]), importances[indices], align='center')
plt.xticks(range(X.shape[1]), [features[i] for i in indices], rotation=90)
plt.tight_layout()
plt.show()

<h1><font color='green'>KNN Classifier</font></h1>

In [None]:
# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred_knn))

<h1><font color='green'>SVM Classifier</font></h1>

In [None]:
# SVM Classifier
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred_svm))

<h1><font color='green'>PCA for Feature Reduction</font></h1>

In [None]:
# PCA for feature reduction
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

plt.figure(figsize=(12, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=data['label'], palette='viridis')
plt.title('PCA of Features')
plt.show()