In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# # Replace commas (,) with periods (.) and convert to float
# data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').apply(float)
# data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').apply(float)
# data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').apply(float)
# data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').apply(float)

# # Extract attributes from the data
# attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
# X = data[attributes].values

np.random.seed(0)
num_samples = 100
data = pd.DataFrame({
    'Nama': [f'Student_{i+1}' for i in range(num_samples)],
    'Peng Sem 1': np.random.rand(num_samples),
    'Ket Sem 1': np.random.rand(num_samples),
    'Peng Sem 2': np.random.rand(num_samples),
    'Ket Sem 2': np.random.rand(num_samples)
})

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].apply(float)
data['Peng Sem 2'] = data['Peng Sem 2'].apply(float)
data['Ket Sem 1'] = data['Ket Sem 1'].apply(float)
data['Ket Sem 2'] = data['Ket Sem 2'].apply(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Retrieve all the data points for each cluster
clusters = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    clusters.append(cluster_points)

# Print the data points for each cluster
for cluster in clusters:
    for data_point in cluster:
        print(data_point[0])  # Assuming 'Nama' is the first attribute
        
# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve superior classes
superior_classes = X[medoids]

# Print the superior classes
for superior_class in superior_classes:
    print(superior_class[0])  # Assuming 'name' is the first attribute


Student_16
Student_17
Student_25
Student_27
Student_30
Student_31
Student_33
Student_44
Student_47
Student_48
Student_54
Student_55
Student_58
Student_61
Student_64
Student_65
Student_68
Student_70
Student_72
Student_74
Student_76
Student_77
Student_78
Student_79
Student_80
Student_83
Student_88
Student_91
Student_93
Student_95
Student_98
Student_100
Student_1
Student_5
Student_6
Student_9
Student_14
Student_15
Student_28
Student_32
Student_34
Student_35
Student_36
Student_39
Student_42
Student_49
Student_50
Student_51
Student_56
Student_60
Student_62
Student_63
Student_67
Student_69
Student_73
Student_81
Student_84
Student_85
Student_86
Student_92
Student_94
Student_96
Student_97
Student_2
Student_3
Student_4
Student_7
Student_8
Student_10
Student_11
Student_12
Student_13
Student_18
Student_19
Student_20
Student_21
Student_22
Student_23
Student_24
Student_26
Student_29
Student_37
Student_38
Student_40
Student_41
Student_43
Student_45
Student_46
Student_52
Student_53
Student_57
Student

In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Generate random data
np.random.seed(0)
num_samples = 100
unique_names = set()
data = []

while len(data) < num_samples:
    name = f'Student_{np.random.randint(1, num_samples+1)}'
    if name not in unique_names:
        unique_names.add(name)
        peng_sem1 = np.random.rand()
        ket_sem1 = np.random.rand()
        peng_sem2 = np.random.rand()
        ket_sem2 = np.random.rand()
        data.append([name, peng_sem1, ket_sem1, peng_sem2, ket_sem2])

data = pd.DataFrame(data, columns=['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2'])

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].apply(float)
data['Peng Sem 2'] = data['Peng Sem 2'].apply(float)
data['Ket Sem 1'] = data['Ket Sem 1'].apply(float)
data['Ket Sem 2'] = data['Ket Sem 2'].apply(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Retrieve all the data points for each cluster
clusters = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    clusters.append(cluster_points)

# Print the names for each cluster
for cluster in clusters:
    for data_point in cluster:
        print(data_point[0])  # Assuming 'Nama' is the first attribute


Student_26
Student_56
Student_83
Student_24
Student_36
Student_60
Student_78
Student_14
Student_12
Student_25
Student_62
Student_71
Student_38
Student_32
Student_98
Student_34
Student_40
Student_73
Student_65
Student_29
Student_81
Student_61
Student_82
Student_75
Student_27
Student_52
Student_58
Student_48
Student_94
Student_67
Student_21
Student_74
Student_30
Student_3
Student_51
Student_92
Student_57
Student_45
Student_84
Student_59
Student_80
Student_20
Student_18
Student_66
Student_85
Student_77
Student_35
Student_22
Student_47
Student_37
Student_55
Student_2
Student_4
Student_54
Student_10
Student_9
Student_49
Student_13
Student_44
Student_97
Student_39
Student_90
Student_50
Student_28
Student_8
Student_69
Student_70
Student_95
Student_72
Student_5
Student_31
Student_7
Student_89
Student_17
Student_91
Student_41
Student_11
Student_16
Student_96
Student_33
Student_53
Student_19
Student_88
Student_1
Student_87
Student_68
Student_6
Student_93
Student_100
Student_46
Student_64
Student

In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').apply(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').apply(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').apply(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').apply(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve superior classes
superior_classes = X[medoids]

# Print the superior classes
for superior_class in superior_classes:
    print(superior_class[0])  # Assuming 'name' is the first attribute


Agatta Kay Sella Sormin
Chiren Amanda P, Silitonga
Agustinus Robert Tua Tambunan


In [9]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve superior classes
superior_classes = X[medoids]

# Print the superior classes
for superior_class in superior_classes:
    print(superior_class[0])  # Assuming 'name' is the first attribute


Alisya Kirana
Agustinus Robert Tua Tambunan
Chiren Amanda P, Silitonga


In [10]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve all names from data
all_names = X[:, 0]  # Assuming 'name' is the first attribute

# Print all the names
for name in all_names:
    print(name)

Agustinus Robert Tua Tambunan
Alisya Kirana
Calista Anggriani Marbun
Chiren Amanda P, Silitonga
Dini Elisabet Dongoran
Dolok Maruarar Pakpahan
Epifaniya Lebora Dongoran
Heni Sianturi
Ipan Satria Harianja
Irhamna Faizul Pakpahan
Kevin Laurensius Sormin
Manganju A,V Marbun
Marcel Sijabat
Mateus Martuani Panjaitan
Mega Rias Riyani Azzahra
MIKAEL KEVIN WESLI SORMIN
Mike Roida Sormin
MORA SINTIA PAKPAHAN
Nadine Benedicta Naibaho
Nazryl F, Tampubolon
Novia Husna Pakpahan
OSRA IGNASIUS GULTOM
Paska Kristian Pakpahan
Petrus Risandi Pakpahan
Puput Yuni  Br, Matondang
Ridho Efendi Pakpahan
Ro Rezeky Simatupang
Santa Debora Teresia Sianturi
Teuku Robbi Alamsyah Bastari
THERESIA ROSALINDA NDRURU
Tiara Yosephine Sormin
Alfredo Pakpahan
Anugrah Harianja
Ardi Pakpahan
Citra Dewi Panjaitan
Dian Juniar Gultom
Dikky Ryan Maranatah Sormin
Doli Karunia Tambunan
ERAWATI PAKPAHAN
Ervin Alparis Panjaitan
HELMAN BINSAR PAKPAHAN
Ica Stefany Gultom
Imelda Rosmawati Silalahi
Kalvin Cleopatra Tambunan
Kevin Binca

In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv', sep=';')

# Exclude the first data row
data = data.iloc[1:]

# Replace commas (,) with periods (.) and convert to float
data['Peng Sem 1'] = data['Peng Sem 1'].str.replace(',', '.').astype(float)
data['Peng Sem 2'] = data['Peng Sem 2'].str.replace(',', '.').astype(float)
data['Ket Sem 1'] = data['Ket Sem 1'].str.replace(',', '.').astype(float)
data['Ket Sem 2'] = data['Ket Sem 2'].str.replace(',', '.').astype(float)

# Extract attributes from the data
attributes = ['Nama', 'Peng Sem 1', 'Ket Sem 1', 'Peng Sem 2', 'Ket Sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_indices = np.where(medoids)[0]
    cluster_points = X[cluster_indices]
    cluster_distances = distances[cluster_indices][:, cluster_indices]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(cluster_points, cluster_indices):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(cluster_points)):
        dissimilarity = total_dissimilarity(i, cluster_indices)
        if np.all(dissimilarity < best_dissimilarity):
            best_medoid = cluster_indices[i]  # Update with cluster index
            best_dissimilarity = dissimilarity
    return best_medoid, best_dissimilarity

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    medoid_index, _ = find_best_medoid(cluster_points, cluster_indices)
    medoids.append(medoid_index)

# Retrieve all attributes from data
all_attributes = X

# Print all the attributes
for row in all_attributes:
    print(f"Nama: {row[0]}, Peng Sem 1: {row[1]}, Ket Sem 1: {row[2]}, Peng Sem 2: {row[3]}, Ket Sem 2: {row[4]}")


Nama: Agustinus Robert Tua Tambunan, Peng Sem 1: 76.2, Ket Sem 1: 76.7, Peng Sem 2: 79.5, Ket Sem 2: 79.0
Nama: Alisya Kirana, Peng Sem 1: 82.6, Ket Sem 1: 82.4, Peng Sem 2: 85.6, Ket Sem 2: 85.2
Nama: Calista Anggriani Marbun, Peng Sem 1: 82.7, Ket Sem 1: 82.7, Peng Sem 2: 83.7, Ket Sem 2: 84.1
Nama: Chiren Amanda P, Silitonga, Peng Sem 1: 79.9, Ket Sem 1: 80.2, Peng Sem 2: 83.1, Ket Sem 2: 83.1
Nama: Dini Elisabet Dongoran, Peng Sem 1: 82.1, Ket Sem 1: 81.7, Peng Sem 2: 85.4, Ket Sem 2: 85.1
Nama: Dolok Maruarar Pakpahan, Peng Sem 1: 78.9, Ket Sem 1: 78.4, Peng Sem 2: 83.6, Ket Sem 2: 83.4
Nama: Epifaniya Lebora Dongoran, Peng Sem 1: 85.7, Ket Sem 1: 85.4, Peng Sem 2: 88.7, Ket Sem 2: 88.8
Nama: Heni Sianturi, Peng Sem 1: 84.6, Ket Sem 1: 85.1, Peng Sem 2: 86.4, Ket Sem 2: 86.3
Nama: Ipan Satria Harianja, Peng Sem 1: 78.8, Ket Sem 1: 77.8, Peng Sem 2: 81.9, Ket Sem 2: 82.4
Nama: Irhamna Faizul Pakpahan, Peng Sem 1: 78.1, Ket Sem 1: 77.8, Peng Sem 2: 80.0, Ket Sem 2: 79.8
Nama: Kevin 

In [7]:
import pandas as pd
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Load data from CSV file
data = pd.read_csv('Datas_Nilai.csv')

# Extract attributes from the data
attributes = ['name', 'peng sem 1', 'ket sem 1', 'peng sem 2', 'ket sem 2']
X = data[attributes].values

# Perform pairwise distance calculation
distances = pairwise_distances(X[:, 1:], metric='euclidean')

# Function to calculate the total dissimilarity for a given medoid index
def total_dissimilarity(index, medoids):
    cluster_points = X[medoids[index]]
    cluster_distances = distances[medoids[index]][:, medoids[index]]
    return sum(cluster_distances)

# Function to find the best medoid with the lowest dissimilarity
def find_best_medoid(medoids):
    best_medoid = None
    best_dissimilarity = float('inf')
    for i in range(len(medoids)):
        dissimilarity = total_dissimilarity(i, medoids)
        if dissimilarity < best_dissimilarity:
            best_medoid = i
            best_dissimilarity = dissimilarity
    return best_medoid

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Find the best medoid for each cluster
medoids = []
for cluster_id in range(k):
    cluster_points = X[medoids_indices == cluster_id]
    medoid_index = find_best_medoid(cluster_id, cluster_points)
    medoids.append(medoid_index)

# Retrieve superior classes
superior_classes = X[medoids]

# Print the superior classes
for superior_class in superior_classes:
    print(superior_class[0])  # Assuming 'name' is the first attribute


ParserError: Error tokenizing data. C error: Expected 5 fields in line 6, saw 6


In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn_extra.cluster import KMedoids

# Generate random data
np.random.seed(0)
num_samples = 100
unique_names = set()
data = []

while len(unique_names) < num_samples:
    name = f'Student_{np.random.randint(1, 100)}'
    if name not in unique_names:
        unique_names.add(name)
        data.append([name])

data = pd.DataFrame(data, columns=['Nama'])

# Extract attributes from the data
X = data['Nama'].values.reshape(-1, 1)

# Perform pairwise distance calculation
distances = pairwise_distances(X, metric='euclidean')

# Perform K-Medoids clustering
k = 3  # Number of clusters
medoids_indices = KMedoids(n_clusters=k, random_state=0).fit_predict(distances)

# Retrieve all the data points for each cluster
clusters = []
for cluster_id in range(k):
    cluster_indices = np.where(medoids_indices == cluster_id)[0]
    cluster_points = X[cluster_indices]
    clusters.append(cluster_points)

# Print the names for each cluster
for cluster in clusters:
    for data_point in cluster:
        print(data_point[0])  # Assuming 'Nama' is the first attribute


KeyboardInterrupt: 