## mindspore实现KNN(基于欧氏距离)

In [26]:
import mindspore.context as context
import mindspore.numpy as mnp
import mindspore.ops as ops
import pandas as pd
from mindspore import Tensor

# 设置MindSpore的运行环境
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")

# 自定义数据加载函数
def load_data(file_path, has_label=True):
    data = pd.read_csv(file_path)
    if has_label:
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
        return X, y
    else:
        X = data.values
        return X

# 加载数据
X_train, y_train = load_data('data/train.csv')
X_val, y_val = load_data('data/val.csv')
X_test = load_data('data/test_data.csv', has_label=False)

# 将数据转换为MindSpore张量
X_train = Tensor(X_train, mnp.float32)
y_train = Tensor(y_train, mnp.int32)
X_val = Tensor(X_val, mnp.float32)
y_val = Tensor(y_val, mnp.int32)
X_test = Tensor(X_test, mnp.float32)

# 定义KNN算法
class KNN:
    def __init__(self, k=3):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        distances = self.compute_distances(X)
        return self.predict_labels(distances)
    
    def compute_distances(self, X):
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = mnp.zeros((num_test, num_train))
        for i in range(num_test):
            dists[i, :] = mnp.sqrt(mnp.sum((self.X_train - X[i, :])**2, axis=1))
        return dists
    
    def predict_labels(self, dists):
        num_test = dists.shape[0]
        y_pred = mnp.zeros(num_test, dtype=mnp.int32)
        for i in range(num_test):
            closest_y = []
            sorted_indices = ops.Sort(axis=0)(dists[i, :])[1]
            closest_y = self.y_train[sorted_indices[:self.k]]
            y_pred[i] = mnp.bincount(closest_y).argmax()
        return y_pred

# 创建KNN实例并训练
knn = KNN(k=3)
knn.fit(X_train, y_train)

# 在验证集上进行预测
y_val_pred = knn.predict(X_val)
accuracy_val = mnp.mean((y_val_pred == y_val).astype(mnp.float32))
print("Validation Accuracy:", accuracy_val.asnumpy())

# 在测试集上进行预测
y_test_pred = knn.predict(X_test)
print("Test Predictions:", y_test_pred.asnumpy())

# 将预测结果保存到CSV文件
df_predictions = pd.DataFrame(y_test_pred.asnumpy(), columns=['label'])
df_predictions.to_csv('task3_test_prediction.csv', index=False)



Validation Accuracy: 0.93333334
Test Predictions: [2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0 2 1 0 2 2 1 0
 2]
