In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve all attributes from data
all_attributes = X

# Print all the attributes along with the cluster assignment (starting from 1)
for i, row in enumerate(all_attributes):
    cluster_assignment = medoids_indices[i] + 1
    print(f"Nama: {row[0]}, Peng Sem 1: {row[1]}, Ket Sem 1: {row[2]}, Peng Sem 2: {row[3]}, Ket Sem 2: {row[4]}, Cluster: {cluster_assignment}")


Nama: Agustinus Robert Tua Tambunan, Peng Sem 1: 76.2, Ket Sem 1: 76.7, Peng Sem 2: 79.5, Ket Sem 2: 79.0, Cluster: 2
Nama: Alisya Kirana, Peng Sem 1: 82.6, Ket Sem 1: 82.4, Peng Sem 2: 85.6, Ket Sem 2: 85.2, Cluster: 1
Nama: Calista Anggriani Marbun, Peng Sem 1: 82.7, Ket Sem 1: 82.7, Peng Sem 2: 83.7, Ket Sem 2: 84.1, Cluster: 1
Nama: Chiren Amanda P, Silitonga, Peng Sem 1: 79.9, Ket Sem 1: 80.2, Peng Sem 2: 83.1, Ket Sem 2: 83.1, Cluster: 3
Nama: Dini Elisabet Dongoran, Peng Sem 1: 82.1, Ket Sem 1: 81.7, Peng Sem 2: 85.4, Ket Sem 2: 85.1, Cluster: 1
Nama: Dolok Maruarar Pakpahan, Peng Sem 1: 78.9, Ket Sem 1: 78.4, Peng Sem 2: 83.6, Ket Sem 2: 83.4, Cluster: 3
Nama: Epifaniya Lebora Dongoran, Peng Sem 1: 85.7, Ket Sem 1: 85.4, Peng Sem 2: 88.7, Ket Sem 2: 88.8, Cluster: 1
Nama: Heni Sianturi, Peng Sem 1: 84.6, Ket Sem 1: 85.1, Peng Sem 2: 86.4, Ket Sem 2: 86.3, Cluster: 1
Nama: Ipan Satria Harianja, Peng Sem 1: 78.8, Ket Sem 1: 77.8, Peng Sem 2: 81.9, Ket Sem 2: 82.4, Cluster: 3
Nama

In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Remove rows with missing values
data = data.dropna()

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2', 'Kelas Awal']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:5], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve all attributes from data
all_attributes = X

# Create a dictionary to map cluster assignments to new classes
cluster_to_class = {
    0: '7a',
    1: '7b',
    2: '7c',
}

# Append new column 'Kelas Baru' to the DataFrame
data['Kelas Baru'] = ''

# Update the 'Kelas Baru' column with the new class based on the cluster assignment
for i, row in enumerate(all_attributes):
    cluster_assignment = medoids_indices[i]
    new_class = cluster_to_class[cluster_assignment]
    data.loc[i, 'Kelas Baru'] = new_class

# Print the updated DataFrame
print(data)


        No                           Nama  Peng Sem 1  Ket Sem 1  Peng Sem 2  \
1      2.0  Agustinus Robert Tua Tambunan        76.2       76.7        79.5   
2      3.0                  Alisya Kirana        82.6       82.4        85.6   
3      4.0       Calista Anggriani Marbun        82.7       82.7        83.7   
4      5.0     Chiren Amanda P, Silitonga        79.9       80.2        83.1   
5      6.0         Dini Elisabet Dongoran        82.1       81.7        85.4   
..     ...                            ...         ...        ...         ...   
242  243.0             SEM FELIX PAKPAHAN        77.9       76.9        79.4   
243  244.0     Sharah Marcaulina Br,Regar        79.3       78.1        80.0   
244  245.0            TIOLENTINA TAMBUNAN        81.7       81.7        83.4   
245  246.0      YEBIDA SEPTRIANI PAKPAHAN        81.3       81.3        84.4   
0      NaN                            NaN         NaN        NaN         NaN   

     Ket Sem 2 Kelas Awal Kelas Baru  


In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Remove rows with missing values
data = data.dropna()

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2', 'Kelas Awal']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:5], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve all attributes from data
all_attributes = X

# Create a dictionary to map cluster assignments to new classes
cluster_to_class = {
    0: '7a',
    1: '7b',
    2: '7c',
    3: '7d',
    4: '7e',
    5: '7f',
    6: '7g'
}

# Append new column 'Kelas Baru' to the DataFrame
data['Kelas Baru'] = ''

# Update the 'Kelas Baru' column with the new class based on the cluster assignment
for i, row in enumerate(all_attributes):
    cluster_assignment = medoids_indices[i]
    new_class = cluster_to_class[cluster_assignment]
    data.loc[i, 'Kelas Baru'] = new_class

data.to_csv('abc.csv', index=False)
# Print the updated DataFrame
print(data)


        No                           Nama  Peng Sem 1  Ket Sem 1  Peng Sem 2  \
1      2.0  Agustinus Robert Tua Tambunan        76.2       76.7        79.5   
2      3.0                  Alisya Kirana        82.6       82.4        85.6   
3      4.0       Calista Anggriani Marbun        82.7       82.7        83.7   
4      5.0     Chiren Amanda P, Silitonga        79.9       80.2        83.1   
5      6.0         Dini Elisabet Dongoran        82.1       81.7        85.4   
..     ...                            ...         ...        ...         ...   
242  243.0             SEM FELIX PAKPAHAN        77.9       76.9        79.4   
243  244.0     Sharah Marcaulina Br,Regar        79.3       78.1        80.0   
244  245.0            TIOLENTINA TAMBUNAN        81.7       81.7        83.4   
245  246.0      YEBIDA SEPTRIANI PAKPAHAN        81.3       81.3        84.4   
0      NaN                            NaN         NaN        NaN         NaN   

     Ket Sem 2 Kelas Awal Kelas Baru  
