<a href="https://colab.research.google.com/github/qaribaldi/fknn/blob/main/fknn_final_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
# Membaca data
df = pd.read_csv('titanic.csv')

# Menampilkan 5 baris pertama
print(df.head())

   Pclass     Sex   Age  SibSp  Parch     Fare Embarked  Survived
0       3    male  34.5      0      0   7.8292        Q         0
1       3  female  47.0      1      0   7.0000        S         1
2       2    male  62.0      0      0   9.6875        Q         0
3       3    male  27.0      0      0   8.6625        S         0
4       3  female  22.0      1      1  12.2875        S         1


In [None]:
# Normalisasi dan Encoding
df = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived']].dropna()
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})
scaler = MinMaxScaler()
df[['Age', 'Fare']] = scaler.fit_transform(df[['Age', 'Fare']])

# Menampilkan hasil
print(df.head())

   Pclass  Sex       Age  SibSp  Parch      Fare  Embarked  Survived
0       3  NaN  0.435065      0      0  0.017929       NaN         0
1       3  NaN  0.597403      1      0  0.014737       NaN         1
2       2  NaN  0.792208      0      0  0.025081       NaN         0
3       3  NaN  0.337662      0      0  0.021136       NaN         0
4       3  NaN  0.272727      1      1  0.035087       NaN         1


In [None]:
# Pisahkan fitur (X) dan label (y)
X = df.drop('Survived', axis=1).values
y = df['Survived'].values
feature_names = df.drop('Survived', axis=1).columns

In [None]:
# Membagi data latih dan data uji
X_train, X_test = X[:100], X[100:150]
y_train, y_test = y[:100], y[100:150]

In [None]:
# Hitung Korelasi Pearson
correlations = {feature: np.corrcoef(X[:, i], y)[0, 1] for i, feature in enumerate(feature_names)}
threshold = np.mean(list(correlations.values()))  # Threshold rata-rata korelasi
sorted_features = sorted(correlations.items(), key=lambda x: abs(x[1]))

In [None]:
# Tampilkan nilai Korelasi Pearson per fitur
print("Korelasi Pearson per fitur terhadap target:")
for feature, corr in sorted_features:
    print(f"{feature}: {corr:.4f}")
print(f"\nThreshold rata-rata korelasi: {threshold:.4f}\n")

Korelasi Pearson per fitur terhadap target:
Age: 0.0067
Embarked: 0.0253
Pclass: -0.0761
SibSp: 0.1575
Parch: 0.1820
Fare: 0.1867
Sex: 1.0000

Threshold rata-rata korelasi: 0.2117



In [None]:
# Fungsi untuk menghitung Euclidean Distance
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [None]:
def fuzzy_knn(train_X, train_y, test_X, k=5, m=2):
    predictions = []

    for test_row in test_X:
        distances = []

        # Hitung jarak Euclidean ke semua data latih
        for i, train_row in enumerate(train_X):
            dist = euclidean_distance(test_row, train_row)
            distances.append((dist, train_y[i]))

        # Urutkan berdasarkan jarak dan ambil k tetangga terdekat
        distances.sort(key=lambda x: x[0])
        neighbors = distances[:k]

        # Menghitung total jarak antar tetangga
        total_distance = sum(d[0] for d in neighbors if d[0] != 0) + 1e-9  # Hindari pembagian nol

        # Menghitung total jarak ke setiap kelas
        class_weights = {0: 0, 1: 0}
        for d, cls in neighbors:
            # Tentukan bobot dengan menghindari pembagian oleh 0
            if d == 0:
                weight = 1e-9  # Menggunakan bobot kecil jika jarak 0
            else:
                if m == 1:
                    weight = 1 / d  # Jika m == 1, gunakan rumus sederhana
                else:
                    weight = (1 / (d ** (2 / (m - 1))))  # Bobot berdasarkan jarak

            class_weights[cls] += weight

        # Menentukan kelas prediksi dengan nilai keanggotaan terbesar
        pred_class = max(class_weights, key=class_weights.get)
        predictions.append(pred_class)

    return predictions

In [None]:
# Fungsi Evaluasi
def evaluate(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    tn = np.sum((y_true == 0) & (y_pred == 0))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    accuracy = (tp + tn) / len(y_true)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return accuracy, precision, recall, f1

# Menyimpan hasil terbaik
best_accuracy = 0
best_params = ""
best_evaluation = ""

# Proses Perhitungan
results = []
m_values = [1, 2, 3]  # Daftar nilai m
k_values = [1, 2, 3]  # Daftar nilai k

for k in k_values:
    for m in m_values:
        evaluations = [f"Evaluasi dengan k = {k}, m = {m}"]

        # Gunakan semua fitur dulu
        y_pred = fuzzy_knn(X_train, y_train, X_test, k=k, m=m)
        acc, prec, rec, f1 = evaluate(y_test, np.array(y_pred))
        evaluations.append(f"Semua Fitur - Akurasi: {acc:.2%}, Precision: {prec:.2%}, Recall: {rec:.2%}, F1-Score: {f1:.2%}")
        results.append(f"k={k}, m={m}, Semua Fitur: Akurasi={acc:.2%}, Precision={prec:.2%}, Recall={rec:.2%}, F1-Score={f1:.2%}")

        # Cek apakah ini hasil terbaik
        if acc > best_accuracy:
            best_accuracy = acc
            best_params = f"k={k}, m={m}, Semua Fitur"
            best_evaluation = evaluations[-1]

        # Hapus satu per satu fitur berdasarkan urutan korelasi
        for feature, _ in sorted_features:
            remaining_features = [f for f in feature_names if f != feature]
            X_reduced = df[remaining_features].values
            y_pred = fuzzy_knn(X_reduced[:100], y[:100], X_reduced[100:150], k=k, m=m)
            acc, prec, rec, f1 = evaluate(y[100:150], np.array(y_pred))
            evaluations.append(f"Tanpa fitur {feature} - Akurasi: {acc:.2%}, Precision: {prec:.2%}, Recall: {rec:.2%}, F1-Score: {f1:.2%}")
            results.append(f"k={k}, m={m}, Tanpa fitur {feature}: Akurasi={acc:.2%}, Precision={prec:.2%}, Recall={rec:.2%}, F1-Score={f1:.2%}")

            # Cek apakah ini hasil terbaik
            if acc > best_accuracy:
                best_accuracy = acc
                best_params = f"k={k}, m={m}, Tanpa fitur {feature}"
                best_evaluation = evaluations[-1]

        print("\n".join(evaluations))
        print("\n")

Evaluasi dengan k = 1, m = 1
Semua Fitur - Akurasi: 88.00%, Precision: 85.71%, Recall: 75.00%, F1-Score: 80.00%
Tanpa fitur Age - Akurasi: 84.00%, Precision: 83.33%, Recall: 62.50%, F1-Score: 71.43%
Tanpa fitur Embarked - Akurasi: 94.00%, Precision: 88.24%, Recall: 93.75%, F1-Score: 90.91%
Tanpa fitur Pclass - Akurasi: 94.00%, Precision: 88.24%, Recall: 93.75%, F1-Score: 90.91%
Tanpa fitur SibSp - Akurasi: 96.00%, Precision: 88.89%, Recall: 100.00%, F1-Score: 94.12%
Tanpa fitur Parch - Akurasi: 98.00%, Precision: 100.00%, Recall: 93.75%, F1-Score: 96.77%
Tanpa fitur Fare - Akurasi: 92.00%, Precision: 87.50%, Recall: 87.50%, F1-Score: 87.50%
Tanpa fitur Sex - Akurasi: 54.00%, Precision: 26.67%, Recall: 25.00%, F1-Score: 25.81%


Evaluasi dengan k = 1, m = 2
Semua Fitur - Akurasi: 88.00%, Precision: 85.71%, Recall: 75.00%, F1-Score: 80.00%
Tanpa fitur Age - Akurasi: 84.00%, Precision: 83.33%, Recall: 62.50%, F1-Score: 71.43%
Tanpa fitur Embarked - Akurasi: 94.00%, Precision: 88.24%, Reca

In [None]:
# Menampilkan hasil terbaik
print("\n=== Hasil Terbaik ===")
print(f"Hasil terbaik: {best_params}")
print(f"Evaluasi terbaik: {best_evaluation}")


=== Hasil Terbaik ===
Hasil terbaik: k=1, m=1, Tanpa fitur Parch
Evaluasi terbaik: Tanpa fitur Parch - Akurasi: 98.00%, Precision: 100.00%, Recall: 93.75%, F1-Score: 96.77%
