<a href="https://colab.research.google.com/github/leemeem14/datahw/blob/main/untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Colab/Python 3에서 바로 실행

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# 1) 데이터셋 불러오기 (Iris)
iris = load_iris()
X, y = iris.data, iris.target

# 2) 학습/테스트 분리
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 3) K와 거리(metric) 설정
#    metric='minkowski', p=2 는 유클리드 거리와 동일
k = 5
knn = KNeighborsClassifier(n_neighbors=k, metric='minkowski', p=2)

# 4) 학습(저장) 및 예측
knn.fit(X_train, y_train)
pred = knn.predict(X_test)

# 5) 결과 출력
acc = knn.score(X_test, y_test)
print(f"K={k}, metric=euclidean -> Accuracy: {acc:.3f}")
print("예측 라벨(일부):", pred[:10])


In [None]:
# Colab: 런타임 유형(Python 3)에서 바로 실행 가능
#K-means
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

# 1) 데이터 생성 (가상 2D 데이터)
X, y_true = make_blobs(
    n_samples=400, centers=3, cluster_std=0.80, random_state=42
)

# 2) k-means 학습
k = 3
kmeans = KMeans(n_clusters=k, n_init='auto', random_state=42)
labels = kmeans.fit_predict(X)
centers = kmeans.cluster_centers_

# 3) 결과 시각화
plt.figure(figsize=(6, 5))
plt.scatter(X[:, 0], X[:, 1], c=labels, s=20, cmap='viridis', alpha=0.7)
plt.scatter(centers[:, 0], centers[:, 1],
            c='red', s=120, marker='X', edgecolors='black', linewidths=1.5,
            label='centroids')
plt.title('k-means clustering (k=3)')
plt.legend()
plt.tight_layout()
plt.show()

# 선택: 군집 응집도 지표(관성; SSE) 출력
print(f"Inertia: {kmeans.inertia_:.2f}")

In [None]:
# Colab: Python 3 런타임에서 바로 실행 가능
#계층적 군집화
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.cluster import AgglomerativeClustering

from scipy.cluster.hierarchy import dendrogram, linkage

# 1) 데이터 생성 (가상 2D 데이터)
X, y_true = make_blobs(
    n_samples=300, centers=3, cluster_std=0.70, random_state=42
)

# 2) 덴드로그램용 계층적 연결 (SciPy linkage)
Z = linkage(X, method='ward')  # 유클리드 기반 분산 최소화 병합

plt.figure(figsize=(7, 4))
dendrogram(Z, truncate_mode='lastp', p=20, leaf_rotation=90, leaf_font_size=10)
plt.title('Hierarchical Clustering Dendrogram (truncated)')
plt.xlabel('Sample index or (cluster size)')
plt.ylabel('Distance')
plt.tight_layout()
plt.show()

# 3) AgglomerativeClustering으로 레이블 추정
model = AgglomerativeClustering(n_clusters=3, linkage='ward')  # ward는 euclidean 전용
labels = model.fit_predict(X)

# 4) 평면 시각화
plt.figure(figsize=(6, 5))
scatter = plt.scatter(X[:, 0], X[:, 1], c=labels, s=20, cmap='viridis', alpha=0.8)
plt.title('Agglomerative Clustering (n_clusters=3, linkage=ward)')
plt.tight_layout()
plt.show()
