In [None]:
from google.colab import drive
import pandas as pd

drive.mount('/content/drive')

path = '/content/drive/MyDrive/dataset_pcd/images.csv'
df = pd.read_csv(path)
df.head()

# **Huffman**

In [None]:
import cv2, heapq, json, os
import numpy as np
from matplotlib import pyplot as plt

class Node:
    def __init__(self, frequency, symbol, left=None, right=None):
        self.frequency = frequency
        self.symbol = symbol
        self.left = left
        self.right = right
        self.huffman_direction = ''
    def __lt__(self, nxt):
        return self.frequency < nxt.frequency

def calculate_huffman_codes(node, code='', huffman_codes=None):
    if huffman_codes is None:
        huffman_codes = {}
    code += node.huffman_direction
    if node.left:
        calculate_huffman_codes(node.left, code, huffman_codes)
    if node.right:
        calculate_huffman_codes(node.right, code, huffman_codes)
    if not node.left and not node.right:
        huffman_codes[node.symbol] = code
    return huffman_codes

def get_merged_huffman_tree(byte_to_frequency):
    huffman_tree = []
    for byte, frequency in byte_to_frequency.items():
        heapq.heappush(huffman_tree, Node(frequency, byte))
    while len(huffman_tree) > 1:
        left = heapq.heappop(huffman_tree)
        right = heapq.heappop(huffman_tree)
        left.huffman_direction = "0"
        right.huffman_direction = "1"
        merged_node = Node(left.frequency + right.frequency, left.symbol + right.symbol, left, right)
        heapq.heappush(huffman_tree, merged_node)
    return huffman_tree[0]

def get_frequency(image_bit_string):
    byte_to_frequency = {}
    for i in range(0, len(image_bit_string), 8):
        byte = image_bit_string[i:i+8]
        byte_to_frequency[byte] = byte_to_frequency.get(byte, 0) + 1
    return byte_to_frequency

def get_compressed_image(image_bit_string, huffman_codes):
    return ''.join(huffman_codes[image_bit_string[i:i+8]] for i in range(0, len(image_bit_string), 8))

def compress_channel(channel):
    bit_string = ''.join(format(pixel, '08b') for row in channel for pixel in row)
    freq = get_frequency(bit_string)
    tree = get_merged_huffman_tree(freq)
    codes = calculate_huffman_codes(tree)
    compressed_bits = get_compressed_image(bit_string, codes)
    return compressed_bits, codes

def decompress_channel(compressed_bits, codes, shape):
    code_to_byte = {v: k for k, v in codes.items()}
    bit_string = ''
    current = ''
    for bit in compressed_bits:
        current += bit
        if current in code_to_byte:
            bit_string += code_to_byte[current]
            current = ''
    # convert bit string to image
    h, w = shape
    image = np.zeros((h, w), dtype=np.uint8)
    idx = 0
    for i in range(h):
        for j in range(w):
            byte = bit_string[idx:idx+8]
            image[i, j] = int(byte, 2)
            idx += 8
    return image

# ========== MAIN ==========
image_folder = '/content/drive/MyDrive/PCD/images'
output_folder = '/content/drive/MyDrive/PCD/huffman_result_rgb'
os.makedirs(output_folder, exist_ok=True)

image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

compression_ratios = []

for image_file in image_files:
    image_path = os.path.join(image_folder, image_file)
    image = cv2.imread(image_path)
    h, w, _ = image.shape

    # Pisah RGB dan quantisasi agar variasi warna tidak terlalu banyak
    image = (image // 8) * 8
    R, G, B = cv2.split(image)

    compressed_R, codes_R = compress_channel(R)
    compressed_G, codes_G = compress_channel(G)
    compressed_B, codes_B = compress_channel(B)

    total_bits = len(compressed_R) + len(compressed_G) + len(compressed_B)
    original_bits = h * w * 3 * 8

    # Dekompres dan gabungkan kembali
    R_dec = decompress_channel(compressed_R, codes_R, (h, w))
    G_dec = decompress_channel(compressed_G, codes_G, (h, w))
    B_dec = decompress_channel(compressed_B, codes_B, (h, w))

    reconstructed = cv2.merge([R_dec, G_dec, B_dec])
    save_path = os.path.join(output_folder, f'{image_file}')
    cv2.imwrite(save_path, reconstructed)

    ratio = original_bits / total_bits
    compression_ratios.append(ratio)

    print(f"{image_file} | Original: {original_bits} bits | Encoded: {total_bits} bits | Rasio: {ratio:.2f}")

# Rata-rata rasio kompresi
if compression_ratios:
    avg_ratio = sum(compression_ratios) / len(compression_ratios)
    print(f"\nRata-rata rasio kompresi: {avg_ratio:.2f}")
else:
    print("\nTidak ada gambar yang diproses.")

# **RLE**

In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
from google.colab import drive

# Path
csv_path = '/content/drive/MyDrive/PCD/images.csv'
image_folder = '/content/drive/MyDrive/PCD/images'
rle_output_folder = '/content/drive/MyDrive/PCD/images_rle'
decoded_image_folder = '/content/drive/MyDrive/PCD/images_decoded'

# Buat folder output jika belum ada
os.makedirs(rle_output_folder, exist_ok=True)
os.makedirs(decoded_image_folder, exist_ok=True)

# Baca nama file dari CSV
df = pd.read_csv(csv_path)

# Fungsi RLE
def run_length_encoding(data):
    encoding = []
    prev_pixel = data[0]
    count = 1
    for pixel in data[1:]:
        if pixel == prev_pixel:
            count += 1
        else:
            encoding.append((prev_pixel, count))
            prev_pixel = pixel
            count = 1
    encoding.append((prev_pixel, count))
    return encoding

# Fungsi dekompresi RLE
def run_length_decoding(rle_data, total_pixels):
    flat_array = []
    for item in rle_data.strip().split():
        pixel, count = item.split(':')
        flat_array.extend([int(pixel)] * int(count))
    return np.array(flat_array[:total_pixels])  # crop jika lebih

# Fungsi cari file gambar
def find_image_file(base_name, folder):
    for ext in ['.jpg', '.jpeg', '.png']:
        path = os.path.join(folder, base_name + ext)
        if os.path.exists(path):
            return path
    return None

# Proses semua gambar
for base_filename in df['image']:  # pastikan kolom 'image' sesuai
    input_path = find_image_file(base_filename, image_folder)
    if input_path is None:
        print(f'File not found: {base_filename}')
        continue

    try:
        # Buka gambar
        with Image.open(input_path).convert('RGB') as img:
            img_array = np.array(img)
            shape = img_array.shape  # (H, W, 3)
            h, w, c = shape

            # Hitung ukuran asli (dalam bit)
            original_bits = h * w * c * 8  # 8 bit per channel

            # Kompresi per channel
            rle_encoded = []
            for ch in range(3):
                flat_channel = img_array[:, :, ch].flatten()
                encoded = run_length_encoding(flat_channel)
                rle_encoded.append(encoded)

            # Simpan ke file RLE
            txt_filename = base_filename + '.txt'
            rle_path = os.path.join(rle_output_folder, txt_filename)
            with open(rle_path, 'w') as f:
                f.write(f"{h},{w},{c}\n")
                for channel_encoded in rle_encoded:
                    f.write(" ".join(f"{p}:{cnt}" for p, cnt in channel_encoded) + '\n')

            # Hitung ukuran RLE (dalam bit)
            compressed_bits = 0
            for channel_encoded in rle_encoded:
                for pixel, count in channel_encoded:
                    # Simulasi penyimpanan: 8 bit untuk pixel + 16 bit untuk count
                    compressed_bits += 8 + 16  # asumsi: pixel disimpan 8-bit, count 16-bit

            compression_ratio = original_bits / compressed_bits if compressed_bits > 0 else 0
            print(f'Compressed: {base_filename}')
            print(f'  Original size: {original_bits} bits')
            print(f'  Compressed size: {compressed_bits} bits')
            print(f'  Compression ratio: {compression_ratio:.2f}')

        # Dekompresi
        with open(rle_path, 'r') as f:
            lines = f.readlines()
            shape = tuple(map(int, lines[0].strip().split(',')))
            h, w, c = shape
            decoded_channels = []
            for line in lines[1:4]:
                decoded_flat = run_length_decoding(line, h * w)
                decoded_channels.append(decoded_flat.reshape((h, w)))

            decoded_array = np.stack(decoded_channels, axis=2)
            img_decoded = Image.fromarray(decoded_array.astype(np.uint8), mode='RGB')

            decoded_path = os.path.join(decoded_image_folder, base_filename + '.jpg')
            img_decoded.save(decoded_path)
            print(f'Decoded & saved: {base_filename}.jpg\n')

    except Exception as e:
        print(f'Failed: {base_filename} - {e}')


# **MODEL**

In [None]:

import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.feature import hog
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, learning_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score

# === Load Dataset ===
df = pd.read_csv("/content/drive/MyDrive/PCD/images.csv")
folder_ori = "/content/drive/MyDrive/PCD/images"
folder_huff = "/content/drive/MyDrive/PCD/huffman_result_rgb"

# === Load dan Ekstrak Fitur HOG ===
def load_and_extract(folder, df):
    features, labels = [], []
    for _, row in df.iterrows():
        path = os.path.join(folder, row['image'] + ".jpg")
        image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            continue
        image = cv2.resize(image, (128, 128))
        hog_feature = hog(image, orientations=9, pixels_per_cell=(8, 8),
                          cells_per_block=(2, 2), block_norm='L2-Hys', feature_vector=True)
        features.append(hog_feature)
        labels.append(row['name'])
    return np.array(features), np.array(labels)

# Gabungkan Original + Huffman
X_ori, y_ori = load_and_extract(folder_ori, df)
X_huff, y_huff = load_and_extract(folder_huff, df)
X = np.concatenate([X_ori, X_huff], axis=0)
y = np.concatenate([y_ori, y_huff], axis=0)

# Encode Label
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Cek distribusi kelas
print("Distribusi label:")
print(pd.Series(y).value_counts())

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)

# Model Random Forest
clf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)
clf.fit(X_train, y_train)

# Prediksi & Evaluasi Dasar
y_pred = clf.predict(X_test)
print("=== Classification Report ===")
print(classification_report(y_test, y_pred, target_names=le.classes_))
print(f"Akurasi: {accuracy_score(y_test, y_pred):.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# === Cross-Validation Accuracy ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(clf, X, y_encoded, cv=skf, scoring='accuracy')
print(f"Cross-Validation Accuracy: {cv_scores.mean():.4f} ± {cv_scores.std():.4f}")

# === ROC AUC (Multiclass OVR) ===
if len(le.classes_) > 2:
    y_prob = clf.predict_proba(X_test)
    auc_score = roc_auc_score(y_test, y_prob, multi_class='ovr')
    print(f"Multiclass ROC AUC: {auc_score:.4f}")
else:
    y_prob = clf.predict_proba(X_test)[:, 1]
    auc_score = roc_auc_score(y_test, y_prob)
    print(f"ROC AUC: {auc_score:.4f}")

# === Learning Curve ===
train_sizes, train_scores, val_scores = learning_curve(
    clf, X, y_encoded, cv=5, scoring='accuracy',
    train_sizes=np.linspace(0.1, 1.0, 10), n_jobs=-1
)

train_mean = train_scores.mean(axis=1)
val_mean = val_scores.mean(axis=1)

plt.figure(figsize=(8, 5))
plt.plot(train_sizes, train_mean, label="Train Accuracy")
plt.plot(train_sizes, val_mean, label="Validation Accuracy")
plt.xlabel("Train Size")
plt.ylabel("Accuracy")
plt.title("Learning Curve")
plt.legend()
plt.grid(True)
plt.show()
