In [7]:
!pip install transformers scikit-learn --quiet

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from transformers import pipeline

In [8]:
df = pd.read_csv("kelayakan-pendidikan-indonesia.csv")

print(df.head())
print(df.columns)

                Provinsi  Sekolah    Siswa  Mengulang  Putus Sekolah  \
0   Prov. D.K.I. Jakarta     2224   756455       1202            869   
1       Prov. Jawa Barat    19553  4487262       4778           5675   
2      Prov. Jawa Tengah    18618  2595050       6909           2399   
3  Prov. D.I. Yogyakarta     1846   270610        357             75   
4       Prov. Jawa Timur    18980  2530849       4694           3327   

   Kepala Sekolah dan Guru(<S1)  Kepala Sekolah dan Guru(≥ S1)  \
0                          1089                          38305   
1                          5007                         209252   
2                          2337                         167535   
3                           326                          19771   
4                          2950                         180465   

   Tenaga Kependidikan(SM)  Tenaga Kependidikan(>SM)  Rombongan Belajar  \
0                     6967                      1569              27830   
1                   

In [9]:
df["Siswa_per_Sekolah"] = df["Siswa"] / df["Sekolah"]
df["Persen_KepsekdanGuru_S1"] = (
    df["Kepala Sekolah dan Guru(≥ S1)"] /
    (df["Kepala Sekolah dan Guru(≥ S1)"] + df["Kepala Sekolah dan Guru(<S1)"])
) * 100
df["Persen_Putus_Sekolah"] = df["Putus Sekolah"] / df["Siswa"] * 100
df["Persen_Mengulang"] = df["Mengulang"] / df["Siswa"] * 100

In [10]:
features = ["Siswa_per_Sekolah", "Persen_KepsekdanGuru_S1", "Persen_Putus_Sekolah", "Persen_Mengulang"]
X = df[features].fillna(df[features].median())
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df["Cluster"] = kmeans.fit_predict(X_scaled)

# Label cluster sesuai kualitas
cluster_means = df.groupby("Cluster")[features].mean()
print(cluster_means)

         Siswa_per_Sekolah  Persen_KepsekdanGuru_S1  Persen_Putus_Sekolah  \
Cluster                                                                     
0               119.111166                92.392257              0.437270   
1               172.373096                97.139123              0.168657   
2               208.728018                81.910940              0.806911   

         Persen_Mengulang  
Cluster                    
0                1.305849  
1                0.406995  
2                3.573621  


In [11]:
summary_input = []
for c in df["Cluster"].unique():
    provs = df[df["Cluster"]==c]["Provinsi"].tolist()
    summary_input.append(f"Cluster {c}: {', '.join(provs)}")

text_input = "Hasil klasifikasi pendidikan SD Indonesia:\n" + "\n".join(summary_input)

In [12]:
with open("input_granite.txt","w") as f:
    f.write(text_input)

print("File siap dipakai di LM Studio: input_granite.txt")

File siap dipakai di LM Studio: input_granite.txt
