In [None]:
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

st.title('Iris Species Clustering with KMeans')

iris = load_iris()
X = iris.data
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = iris.target


scaler = StandardScaler()
df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']] = scaler.fit_transform(
    df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']]
)

cluster = KMeans(n_clusters=3, random_state=42)
clusters = cluster.fit_predict(df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']])
df['cluster'] = clusters


st.write("Scatter Plot of Clusters")
fig, ax = plt.subplots()
sns.scatterplot(x='petal length (cm)', y='sepal length (cm)', hue='cluster', data=df, palette='viridis', ax=ax)
st.pyplot(fig)


In [None]:
import pandas as pd 
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.metrics import silhouette_score

iris = load_iris()
X = iris.data
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = iris.target

scaler = StandardScaler()
df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']] = scaler.fit_transform(
    df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']]
)

def plot_inertia(data, max_k):
    inertia = []
    means = []
    score = []
    
    for k in range(2, max_k):  
        kmeans = KMeans(n_clusters=k)
        cluster_labels = kmeans.fit_predict(data)
        means.append(k)
        inertia.append(kmeans.inertia_)
        score.append(silhouette_score(data, cluster_labels))
    
    fig, ax1 = plt.subplots(figsize=(10, 6))
    ax2 = ax1.twinx()
    ax1.plot(means, inertia, marker='o', color='r')
    ax2.plot(means, score, marker='x', color='b')
    
    ax1.set_xlabel('Number of Clusters (k)')
    ax1.set_ylabel('Inertia', color='r')
    ax2.set_ylabel('Silhouette Score', color='b')
    ax1.set_title('Elbow Method and Silhouette Score for Optimal k')
    fig.legend(loc='upper right')
    st.pyplot(fig)


k = st.sidebar.slider('Select max number of clusters (k)', min_value=1, max_value=40, value=10)

st.write("### Optimal Plot of Clusters")
plot_inertia(df[['petal length (cm)', 'sepal length (cm)']], k)


In [None]:
import pandas as pd 
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn.metrics import silhouette_score

iris = load_iris()
X = iris.data
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']])

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

def bro(data, k):
    inertia = []
    silhouette_avg = []
    clusters_range = range(2, k + 1)
    kmeans = KMeans(n_clusters=k)
    cluster_labels = kmeans.fit_predict(data)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.scatterplot(data[:, 0], data[:, 1], c=cluster_labels, cmap='viridis', s=50)
    plt.title('K-Means Clusters in PCA-Reduced Space')
    st.pyplot(fig)

k = st.sidebar.slider('Select max number of clusters (k)', min_value=2, max_value=10, value=3)
st.write("### Clusters in PCA-Reduced Space")
bro(X_pca, k)


In [None]:
import pandas as pd
import streamlit as st
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from scipy.cluster.hierarchy import dendrogram, linkage

iris = load_iris()
X = iris.data
df = pd.DataFrame(X, columns=iris.feature_names)
df['species'] = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[['petal length (cm)', 'petal width (cm)', 'sepal length (cm)', 'sepal width (cm)']])

Z = linkage(X_scaled, method='ward')
plt.figure(figsize=(12, 8))
dendrogram(Z, labels=iris.target)
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('Sample index')
plt.ylabel('Distance')
st.pyplot(plt)


#### K-Means generally performs better for large datasets and when the number of clusters is known, while Hierarchical Clustering is better for smaller datasets and provides a dendrogram for visual analysis. Overall, K-Means is more efficient for the Iris dataset due to its clear separation of clusters.