<a href="https://colab.research.google.com/github/rachelinaputri/data-mini-project/blob/main/50_training_25_validation_dan_25_testing_(CPA1_3).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

# Fungsi untuk memuat data
def load_data(file_path):
    df = pd.read_csv(file_path, delimiter=';')
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    X = X.apply(pd.to_numeric, errors='coerce')
    y = pd.factorize(y)[0]
    return X, y

# Fungsi untuk menghitung specificity
def specificity_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp)
    return specificity

# Fungsi untuk melatih dan mengevaluasi model
def train_and_evaluate(X_train, X_val, X_test, y_train, y_val, y_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    models = {
        'SVM': SVC(),
        'K-NN': KNeighborsClassifier()
    }

    results = {}

    for name, model in models.items():
        model.fit(X_train_scaled, y_train)
        y_val_pred = model.predict(X_val_scaled)
        y_test_pred = model.predict(X_test_scaled)

        results[name] = {
            'Validation Accuracy': accuracy_score(y_val, y_val_pred),
            'Validation Recall': recall_score(y_val, y_val_pred, average='macro'),
            'Validation Precision': precision_score(y_val, y_val_pred, average='macro'),
            'Validation F1 Score': f1_score(y_val, y_val_pred, average='macro'),
            'Validation Specificity': specificity_score(y_val, y_val_pred),
            'Test Accuracy': accuracy_score(y_test, y_test_pred),
            'Test Recall': recall_score(y_test, y_test_pred, average='macro'),
            'Test Precision': precision_score(y_test, y_test_pred, average='macro'),
            'Test F1 Score': f1_score(y_test, y_test_pred, average='macro'),
            'Test Specificity': specificity_score(y_test, y_test_pred)
        }

    return results

# Main execution block
file_paths = ['CPA1.csv', 'CPA2.csv', 'CPA3.csv']
for file_path in file_paths:
    X, y = load_data(file_path)
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.5, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    results = train_and_evaluate(X_train, X_val, X_test, y_train, y_val, y_test)
    print(f'Results for {file_path}:')
    print(results)


Results for CPA1.csv:
{'SVM': {'Validation Accuracy': 0.78, 'Validation Recall': 0.7694805194805194, 'Validation Precision': 0.7818336162988115, 'Validation F1 Score': 0.7726333195535344, 'Validation Specificity': 0.6818181818181818, 'Test Accuracy': 0.8, 'Test Recall': 0.8013136288998358, 'Test Precision': 0.7954911433172303, 'Test F1 Score': 0.797077922077922, 'Test Specificity': 0.7931034482758621}, 'K-NN': {'Validation Accuracy': 0.74, 'Validation Recall': 0.7435064935064934, 'Validation Precision': 0.74, 'Validation F1 Score': 0.7390606182256123, 'Validation Specificity': 0.7727272727272727, 'Test Accuracy': 0.78, 'Test Recall': 0.770935960591133, 'Test Precision': 0.775, 'Test F1 Score': 0.7726333195535344, 'Test Specificity': 0.8275862068965517}}
Results for CPA2.csv:
{'SVM': {'Validation Accuracy': 0.76, 'Validation Recall': 0.737012987012987, 'Validation Precision': 0.7896825396825397, 'Validation F1 Score': 0.7395833333333333, 'Validation Specificity': 0.5454545454545454, 'Te