# IMPORT

In [None]:

import pandas as pd # 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
# mediapipe dan openCV

# from tkinter import ttk
# from tkinter import *

# Load Datasets

In [None]:
data_path = 'datasets/Datafull terakhir test.csv' 
# membuat data path yang mengarah ke dataset
# variable data_path akan dibaca menggunakan fungsi dari libary pandas

df = pd.read_csv(data_path, sep=';')
# membaca variable data_path yang berisi path menuju file data csv berada
# serta menambahkan separator agar format dataset menjadi seperti 
# 200.245 bukan 200;245

df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
# menghapus kolom unnamed seperti pada row kedua pada data csv Data Train.csv dimana class abjad berisi "unnamed"/ambigu

label_col = df.columns[-1]
coord_cols = df.columns[:-1]
# menyesuaikan format df columns seperti label_col untuk menandai label gesture dan kolom lainya untuk fitur menunjuk koordinat

if df[label_col].dtype != object:
    raise ValueError("Kolom label harus bertipe karakter")
# validasi apabila data label bertipe selain float/int64, 
# jika masih ada data yang bertipe selain ini maka akan ada Value error kolom label harus bertipe karakter

expected_cols = []
for i in range(len(coord_cols) // 2):
    expected_cols.extend([f"{i}x", f"{i}y"])
# membuat fungsi agar column x dan y tidak tertukar serta membuat data landmark / num_points menjadi lebih konsisten

if list(coord_cols) != expected_cols:
    raise ValueError("Nama atau urutan kolom koordinat tidak sesuai")
# mengecek apabila urutan korelasi antara x dan y tidak sesuai misal 10y; 20y(kedua variable y) dimana seharusnya x dan y 

df[coord_cols] = df[coord_cols].apply(pd.to_numeric, errors='coerce')
# memastikan seluruh kolom koordinat menjadi numerik dan tidak ada data selain numerik
# misal String dll

num_points = len(coord_cols) // 2
# membagi num_points dengan koordinat kolom = hasil landmark
# hasil dari ini seharusnya 21 landmark(sesuai jumlah titik kontur tangan)

# print pengecekan validasi data
print(f"Total sample    : {len(df)}")
print(f"Jumlah landmark : {num_points}")
print(f"Jumlah kelas    : {df[label_col].nunique()}")

df.head()


In [None]:
# cek dimensi data sebelum masuk data preprocessing
df.shape

In [None]:
df.nunique() # mengecek data unique pada setiap kolom

In [None]:
df.info() # melihat range index tiap karakter serta melihat tipe data tiap karakter

In [None]:
df.dtypes # melihat apakah masih ada data yang tidak bertipe integer

# Data PreProcessing

In [None]:
df.isna().sum() # mengecek data yang kosong
df.duplicated().sum # mengecek data yang duplikat

In [None]:
df = df.drop_duplicates() #menghapus data duplikat

In [None]:
df.shape # cek dimensi data setelah dihapus data duplikat

# Exploratory Data Analys

In [None]:
plt.figure(figsize=(10, 8))
sns.countplot(data=df, x="char")
plt.title("Distribusi Label / Char")
plt.ylabel("Jumlah Koordinat")
plt.show()

In [None]:
def plot_sample(row): 
    # membuat fungsi plot_sample untuk mengecek koordinat koordinat tiap handsign

    x = [row[f"{i}x"] for i in range(21)]
    y = [row[f"{i}y"] for i in range(21)]

    plt.Figure(figsize=(10, 8))
    plt.plot(x, y, marker="o")
    plt.gca().invert_yaxis()
    plt.title(f"Class: {row['char']}")
    plt.show()

In [None]:
for label in df['char'].unique():
    subset = df[df['char'] == label]

    if subset.empty:
        continue

    sample = subset.sample(1).iloc[0]
    plot_sample(sample)
    
    # mengaplikasikan plot_sample pada foreach agar tidak membuat diagram plot satu persatu


In [None]:
x_cols = [c for c in df.columns if c.endswith('x')]
y_cols = [c for c in df.columns if c.endswith('y')]

plt.figure(figsize=(12, 4))
sns.boxplot(data=df[x_cols])
plt.title("Distribusi Koordinat X")
plt.show()

plt.figure(figsize=(12, 4))
sns.boxplot(data=df[y_cols])
plt.title("Distribusi Koordinat Y")
plt.show()

In [None]:
df = df[df['char'].notna()]          # buang NaN
df['char'] = df['char'].astype(str) # pastikan string
df = df[df['char'].str.match(r'^[A-Za-z]$')]
df['char'] = df['char'].str.upper()


In [None]:
print("\n=== Encoding Kolom 'char' ===")
unique_chars = sorted(df['char'].unique())
char_to_int = {char: idx for idx, char in enumerate(unique_chars)}
df['char_encoded'] = df['char'].map(char_to_int)
print("Mapping label:", char_to_int)

In [None]:
df.head()

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.x_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return np.array(predictions)
    
    def _predict(self, x):
        distances = np.sqrt(np.sum((self.x_train - x) ** 2, axis=1))
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = self.y_train[k_indices]
        unique, counts = np.unique(k_nearest_labels, return_counts=True)
        most_common = unique[np.argmax(counts)]
        return most_common
    
    def accuracy(self, y_true, y_pred):
        return np.sum(y_true == y_pred) / len(y_true)

In [None]:
def split_train_test(X, y, test_size=0.2, random_state=42):
    np.random.seed(random_state)
    n_samples = len(X)
    n_test = int(n_samples * test_size)

    indices = np.random.permutation(n_samples)
    test_indices = indices[:n_test]
    train_indices = indices[n_test:]

    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    return X_train, X_test, y_train, y_test


def define_conture(A):
    # ================== LOAD DATA ==================
    X_raw = df.drop(["char", "char_encoded"], axis=1).values
    y = df["char_encoded"].values

    # ================== PREPROCESS TRAIN DATA ==================
    X_processed = []
    for x in X_raw:
        x = x.reshape(1, -1)  # (1, 42)
        x_prep = preprocess_single_hand(x)
        X_processed.append(x_prep[0])  # ambil (42,)

    X = np.array(X_processed)  # (n_samples, 42)

    # ================== SPLIT ==================
    X_train, X_test, y_train, y_test = split_train_test(
        X, y, test_size=0.2, random_state=42
    )

    # ================== TRAIN ==================
    classify = KNN(k=3)
    classify.fit(X_train, y_train)

    # ================== PREPROCESS CAMERA DATA ==================
    A = preprocess_single_hand(A)  # (1, 42)

    # ================== PREDICT ==================
    pred = classify.predict(A)
    pred_label = int(pred[0])

    result_char = df.loc[df["char_encoded"] == pred_label, "char"].iloc[0]

    print("Prediksi:", result_char)
    return result_char


def preprocess_single_hand(A):
    A = A.copy()
    coords = A.reshape(-1, 2)

    # 1. Translasi (wrist = titik 0)
    coords -= coords[0]

    # 2. Normalisasi skala
    max_dist = np.max(np.linalg.norm(coords, axis=1))
    if max_dist != 0:
        coords /= max_dist

    # 3. Normalisasi rotasi
    ref = coords[9]
    angle = np.arctan2(ref[1], ref[0])

    rot = np.array(
        [[np.cos(-angle), -np.sin(-angle)], [np.sin(-angle), np.cos(-angle)]]
    )

    coords = coords @ rot.T

    return coords.flatten().reshape(1, -1)

# Pengambilan gambar handsign

In [None]:
import cv2
import numpy as np
import mediapipe as mp

# ================== MEDIAPIPE SETUP ==================
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils

hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
)

cap = cv2.VideoCapture(0)

print("Tekan C untuk capture, Q untuk keluar")

# ================== GET HAND LANDMARK ==================
def get_hand_points_mediapipe(frame):
    h, w, _ = frame.shape
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(rgb)

    if not result.multi_hand_landmarks:
        return None, None

    hand_landmarks = result.multi_hand_landmarks[0]

    points = []
    for lm in hand_landmarks.landmark:
        x = lm.x * w
        y = lm.y * h
        points.append([x, y])

    return np.array(points, dtype=np.float32), hand_landmarks


# ================== MAIN LOOP ==================
while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)

    points, hand_landmarks = get_hand_points_mediapipe(frame)

    if points is not None:
        # gambar skeleton (CARA BENAR)
        mp_draw.draw_landmarks(
            frame,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
        )

        # gambar index titik (debug)
        for i, (x, y) in enumerate(points):
            cv2.circle(frame, (int(x), int(y)), 4, (0, 0, 255), -1)
            cv2.putText(
                frame,
                str(i),
                (int(x) + 4, int(y) - 4),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.4,
                (255, 255, 255),
                1,
            )

    cv2.imshow("Hand Sign - MediaPipe", frame)
    key = cv2.waitKey(1) & 0xFF

    # ================== CAPTURE ==================
    if key == ord("c") and points is not None:
        A = points.flatten().reshape(1, -1)  # (1, 42)
        define_conture(A)
        break

    elif key == ord("q"):
        break

cap.release()
cv2.destroyAllWindows()
hands.close()


# Pengecekan Akurasi 

In [None]:
# # Persiapan data
# X = df.drop(['char', 'char_encoded'], axis=1).values
# y = df['char_encoded'].values

# # csv_conture =  pd.read_csv('samples/data.csv')

# # A = np.array([csv_conture])
# # A = A.reshape(1, -1)  # Reshape A to match the input shape expected by the KNN model

# # print(X[0])

# # print(A)

# # print("Dimensi X:", X.shape)
# # print("Dimensi y:", y.shape)

# X_train, X_test, y_train, y_test = split_train_test(X, y, test_size=0.2, random_state=42)

In [None]:
# classify = KNN(k=3)
# classify.fit(X_train, y_train) 

# # predictions = classify.predict(A)

# predictions= classify.predict(A)

# arr_alfa = df['char_encoded'].unique()
# alfa = df['char'].unique()

# acc = classify.accuracy(X_train, y_train)
# print(acc)

# # checking_data = zip(arr_alfa, alfa)
# # for x,y in checking_data:
# #     if x == predictions:
# #         print('benar', y)
# #     else:
# #         print('salah', y)