<a href="https://colab.research.google.com/github/simsekahmet/K-means_clustering/blob/main/k_means.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans

In [None]:
df = pd.read_csv("iris_dataset.csv")
x = df.iloc[:,0:2]
x1 = df.iloc[:,2:4]
df.head()


**Finding the best K value**

In [None]:
def best_k_values(x, cl_num=10):
    """Finding the best K value for K-means Clustering

    --------------------
    Args:
        x (DataFrame): independent variables (2 DataFrame column)
        cl_num (int, optional): number of optimizations to be made for Elbow method. Defaults to: 10

    --------------------
    Returns: Graph of optimization

    --------------------
    Example:
        x = df.iloc[:,0:2]
                                        or
        x1 = df.iloc[:,2:4]
    """
    wcss =[KMeans(i).fit(x).inertia_ for i in range(1,cl_num)]
    number_clusters = range(1,cl_num)
    plt.figure()
    plt.plot(number_clusters, wcss)
    plt.title('The Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('Within-cluster Sum of Squares')

In [None]:
best_k_values(x)

**K-Means Clustering**

In [None]:
def kmeans_clustering(x, k, scale = False):
    """K-means Clustering

    --------------------
    Args:
        x (DataFrame): independent variables (2 DataFrame column)
        k (int): K value for clustering
        scale (bool, optional): Scaling. Defaults to: 10

    --------------------
    Returns: Graph of clustered data

    --------------------
    Example:
        x = df.iloc[:,0:2]
        k = 3
                                        or
        x1 = df.iloc[:,2:4]
        k= 3
    """

    if scale:
        from sklearn import preprocessing

        x_scaled = preprocessing.scale(x)
        kmeans = KMeans(k).fit(x_scaled)
        identified_clusters = kmeans.fit_predict(x_scaled)
        plt.figure()
        plt.scatter(x.iloc[:,0], x.iloc[:,1], c=identified_clusters, cmap="rainbow")
        plt.xlabel(pd.DataFrame(x).columns.values[0])
        plt.ylabel(pd.DataFrame(x).columns.values[1])
        plt.show()
    else:
        kmeans = KMeans(k).fit(x)
        identified_clusters = kmeans.fit_predict(x)
        plt.figure()
        plt.scatter(x.iloc[:,0], x.iloc[:,1], c=identified_clusters, cmap="rainbow")
        plt.xlabel(pd.DataFrame(x).columns.values[0])
        plt.ylabel(pd.DataFrame(x).columns.values[1])
        plt.show()

In [None]:
kmeans_clustering(x, 3)