<a href="https://colab.research.google.com/github/rachelinaputri/data-mini-project/blob/main/miniproject2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report

# Paths to the CSV files
file1_path = 'CPB1.csv'
file2_path = 'CPB2.csv'
file3_path = 'CPB3.csv'
file4_path = 'CPB4.csv'
file5_path = 'CPB5.csv'
file6_path = 'CPB6.csv'
file7_path = 'CPB7.csv'

# Load the data from CSV files
data1 = pd.read_csv(file1_path, delimiter=';')
data2 = pd.read_csv(file2_path, delimiter=';')
data3 = pd.read_csv(file3_path, delimiter=';')
data4 = pd.read_csv(file4_path, delimiter=';')
data5 = pd.read_csv(file5_path, delimiter=';')
data6 = pd.read_csv(file6_path, delimiter=';')
data7 = pd.read_csv(file7_path, delimiter=';')

# Rename the target column in each dataframe for consistency
data1.rename(columns={'kelas1': 'target'}, inplace=True)
data2.rename(columns={'kelas2': 'target'}, inplace=True)
data3.rename(columns={'kelas3': 'target'}, inplace=True)
data4.rename(columns={'kelas4': 'target'}, inplace=True)
data5.rename(columns={'kelas5': 'target'}, inplace=True)
data6.rename(columns={'kelas6': 'target'}, inplace=True)
data7.rename(columns={'kelas7': 'target'}, inplace=True)

# Combine the three datasets
combined_data = pd.concat([data1, data2, data3, data4, data5, data6, data7], ignore_index=True)

# Convert decimal commas to dots and convert to float
for column in ['powLv1', 'powLv2', 'powLv3', 'powLv4', 'powLv5']:
    combined_data[column] = combined_data[column].str.replace(',', '.').astype(float)

# Extract features and target
X = combined_data.drop('target', axis=1)
y = combined_data['target']

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the SVM and K-NN models
svm_model = SVC(kernel='rbf')
knn_model = KNeighborsClassifier(n_neighbors=5)

# Function to evaluate model
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    f1 = f1_score(y_test, predictions, average='weighted')
    cm = confusion_matrix(y_test, predictions)
    sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
    specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1-Score: {f1:.2f}")
    print(f"Sensitivity: {sensitivity:.2f}")
    print(f"Specificity: {specificity:.2f}")
    print(classification_report(y_test, predictions))

# Split the data for the scenarios
# 75% training, 25% testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

# Evaluate SVM
print("Evaluating SVM:")
evaluate_model(svm_model, X_train, X_test, y_train, y_test)

# Evaluate K-NN
print("Evaluating K-NN:")
evaluate_model(knn_model, X_train, X_test, y_train, y_test)

# You can add the same evaluation for the 50% training, 25% validation, 25% testing scenario by following a similar procedure.

Evaluating SVM:
Accuracy: 0.61
F1-Score: 0.50
Sensitivity: 1.00
Specificity: 0.00
              precision    recall  f1-score   support

           A       0.70      0.88      0.78       192
           B       0.00      0.00      0.00        53
           C       0.50      0.85      0.63       122
           D       0.00      0.00      0.00        83

    accuracy                           0.61       450
   macro avg       0.30      0.43      0.35       450
weighted avg       0.43      0.61      0.50       450

Evaluating K-NN:
Accuracy: 0.64
F1-Score: 0.61
Sensitivity: 0.88
Specificity: 0.09
              precision    recall  f1-score   support

           A       0.69      0.86      0.76       192
           B       0.17      0.09      0.12        53
           C       0.69      0.67      0.68       122
           D       0.56      0.40      0.46        83

    accuracy                           0.64       450
   macro avg       0.53      0.51      0.51       450
weighted avg       0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
