# 패키지를 사용하지 않고 폴드 직접 생성

In [17]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

# 데이터 생성
np.random.seed(42)
n_samples = 100
n_features = 5
X = np.random.rand(n_samples, n_features)
y = np.dot(X, np.array([3, 2, -1, 0.5, 1])) + 0.1 * np.random.randn(n_samples)

# 모델 정의
model = LinearRegression()

# K값 설정
K = 5

# 폴드 생성 및 교차 검증
indices = np.arange(len(X))
np.random.shuffle(indices)
fold_size = len(X) // K   # fold_size = 20
mse_scores = []

for i in range(K):
    # index fold: 0 ~ 19, 20 ~ 39, 40 ~ 59, 60 ~ 79, 80 ~ 99
    test_indices = indices[i * fold_size:(i + 1) * fold_size]
    # test index 외 나머지
    train_indices = np.concatenate([indices[:i * fold_size], indices[(i + 1) * fold_size:]])
    X_train, y_train = X[train_indices], y[train_indices]
    X_test, y_test = X[test_indices], y[test_indices]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)
    print(f"Custom Fold {i+1} MSE: {mse:.3f}")

final_mse = np.mean(mse_scores)
print(f"Custom K-Fold Final MSE: {final_mse:.3f}")

# K-fold를 적용하지 않았을 때의 MSE
model.fit(X, y)
y_pred = model.predict(X)
no_k_fold_mse = mean_squared_error(y, y_pred)
print(f"No K-Fold MSE: {no_k_fold_mse:.3f}")

print(f"\nComparison:\nCustom K-Fold MSE: {final_mse:.3f}\nNo K-Fold MSE: {no_k_fold_mse:.3f}")

Custom Fold 1 MSE: 0.008
Custom Fold 2 MSE: 0.012
Custom Fold 3 MSE: 0.014
Custom Fold 4 MSE: 0.013
Custom Fold 5 MSE: 0.020
Custom K-Fold Final MSE: 0.013
No K-Fold MSE: 0.011

Comparison:
Custom K-Fold MSE: 0.013
No K-Fold MSE: 0.011


# 패키지 사용

In [18]:
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression

# 데이터 생성
np.random.seed(42)
n_samples = 100
n_features = 5
X = np.random.rand(n_samples, n_features)
y = np.dot(X, np.array([3, 2, -1, 0.5, 1])) + 0.1 * np.random.randn(n_samples)

# 모델 정의
model = LinearRegression()

# K값 설정
K = 5

# 폴드 생성 및 교차 검증
kf = KFold(n_splits=K, shuffle=True, random_state=42)
mse_scores = []

for i, (train_index, test_index) in enumerate(kf.split(X)):     # kf.split(X)
    X_train, y_train = X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)
    print(f"Package Fold {i+1} MSE: {mse:.3f}")

final_mse = np.mean(mse_scores)
print(f"Package K-Fold Final MSE: {final_mse:.3f}")

# K-fold를 적용하지 않았을 때의 MSE
model.fit(X, y)
y_pred = model.predict(X)
no_k_fold_mse = mean_squared_error(y, y_pred)
print(f"No K-Fold MSE: {no_k_fold_mse:.3f}")

print(f"\nComparison:\nPackage K-Fold MSE: {final_mse:.3f}\nNo K-Fold MSE: {no_k_fold_mse:.3f}")

Package Fold 1 MSE: 0.010
Package Fold 2 MSE: 0.013
Package Fold 3 MSE: 0.011
Package Fold 4 MSE: 0.012
Package Fold 5 MSE: 0.017
Package K-Fold Final MSE: 0.013
No K-Fold MSE: 0.011

Comparison:
Package K-Fold MSE: 0.013
No K-Fold MSE: 0.011
