<a href="https://colab.research.google.com/github/srishticodes/Dataanalysis/blob/main/fuzzycmeansproblem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Write a python code to read a data sample (csv file). Next initialize a
membership matrix, and cluster the data samples using soft clustering
technique having level of fuzziness as 1.26 and c=2 (without using inbuilt
methods)

In [None]:
import csv
import random
import math
import urllib.request
import io

def load_data(filepath):
    """
    Read numeric data from a CSV file and return as a list of lists of floats.
    Assumes first row is header.
    """
    data = []

    # Handle both local and URL CSVs
    if filepath.startswith("http"):
        response = urllib.request.urlopen(filepath)
        f = io.StringIO(response.read().decode('utf-8'))
    else:
        f = open(filepath, newline='')

    reader = csv.reader(f)
    headers = next(reader)  # skip header
    for row in reader:
        # Skip non-numeric columns like "Species" in Iris dataset
        try:
            data.append([float(val) for val in row if val.replace('.', '', 1).isdigit()])
        except ValueError:
            continue
    return data

def initialize_membership(n_samples, c=2):
    """
    Initialize U as an n_samples x c matrix with each row summing to 1,
    using uniform random numbers.
    """
    U = []
    for _ in range(n_samples):
        u = [random.random() for _ in range(c)]
        s = sum(u)
        U.append([val / s for val in u])
    return U

def compute_centroids(data, U, m=1.26):
    """
    Compute centroids using fuzzy memberships.
    """
    n_samples = len(data)
    n_features = len(data[0])
    c = len(U[0])
    centroids = [[0.0] * n_features for _ in range(c)]

    for j in range(c):
        num = [0.0] * n_features
        den = 0.0
        for i in range(n_samples):
            weight = U[i][j] ** m
            den += weight
            for d in range(n_features):
                num[d] += weight * data[i][d]
        centroids[j] = [num[d] / den for d in range(n_features)]
    return centroids

def update_membership(data, centroids, m=1.26):
    """
    Update the membership matrix U.
    """
    n_samples = len(data)
    c = len(centroids)
    U_new = [[0.0] * c for _ in range(n_samples)]
    exponent = 2.0 / (m - 1.0)

    for i in range(n_samples):
        dists = []
        for j in range(c):
            dist = math.sqrt(sum(
                (data[i][d] - centroids[j][d]) ** 2
                for d in range(len(data[0]))
            ))
            dists.append(dist if dist > 1e-12 else 1e-12)

        for j in range(c):
            denom = sum((dists[j] / dists[k]) ** exponent for k in range(c))
            U_new[i][j] = 1.0 / denom
    return U_new

def fuzzy_c_means(filepath, c=2, m=1.26, epsilon=1e-5, max_iter=100):
    """
    Perform Fuzzy C-Means clustering.
    """
    data = load_data(filepath)
    n_samples = len(data)
    U = initialize_membership(n_samples, c)

    for iteration in range(max_iter):
        U_prev = [row[:] for row in U]
        centroids = compute_centroids(data, U, m)
        U = update_membership(data, centroids, m)

        diff = sum(
            abs(U[i][j] - U_prev[i][j])
            for i in range(n_samples)
            for j in range(c)
        )
        if diff < epsilon:
            print(f"Converged in {iteration + 1} iterations")
            break

    return centroids, U

if __name__ == "__main__":
    csv_path = ""/kaggle/input/iris/Iris.csv""
    centers, membership = fuzzy_c_means(csv_path, c=3)
    print("Cluster centers:")
    for center in centers:
        print(center)
    print("\nFirst five membership rows:")
    for row in membership[:5]:
        print(row)


Converged in 52 iterations
Cluster centers:
[75.49453423252312, 5.919486560801025, 2.7785139814662627, 4.240843239500129, 1.322071003060998]
[25.212680404038828, 5.028343236162345, 3.413251508700955, 1.5065870772834362, 0.26081739297181766]
[125.78705081886477, 6.5813241049058595, 2.9696473503595606, 5.523762024550332, 2.011024053386354]

First five membership rows:
[0.00017472872861289452, 0.9998219625164851, 3.308754901819003e-06]
[0.00014031935848416756, 0.9998571330788445, 2.547562671222458e-06]
[0.00011101487594724314, 0.9998870533528978, 1.9317711550955415e-06]
[8.679586046628348e-05, 0.9999117590938873, 1.4450456463746619e-06]
[6.656036073936304e-05, 0.9999323797868648, 1.0598523960812642e-06]
