In [None]:
import pandas as pd
import re

In [None]:
print("1. Memulai proses pemuatan data...")

# Load Dataset Laptop Utama
try:
    df = pd.read_csv('laptop_data_scrapped.csv')
    print(f"   -> Berhasil memuat {len(df)} data laptop.")
except FileNotFoundError:
    print("   [ERROR] File 'dataset_laptop.csv' tidak ditemukan!")
    exit()

# Load Dataset Referensi Skor CPU (Kunci Jawaban)
# Kita siapkan kamus CPU agar pencarian cepat
cpu_dictionary = {}
try:
    df_ref = pd.read_csv('skor_cpu.csv')
    # Bersihkan nama dan buat dictionary
    cpu_dictionary = dict(zip(df_ref['CPU Name'].str.lower().str.strip(), df_ref['CPU Mark']))
    print(f"   -> Berhasil memuat {len(cpu_dictionary)} referensi skor CPU.")
except FileNotFoundError:
    print("   [WARNING] File 'referensi_skor_cpu.csv' tidak ditemukan. Menggunakan skor tebakan (dummy).")

In [None]:
# --- A. FUNGSI CLEANING DASAR ---
def clean_ram(text):
    # Mengambil angka saja dari "8GB RAM" -> 8
    numbers = re.findall(r'\d+', str(text))
    return int(numbers[0]) if numbers else 0

def clean_price(price_raw):
    # Asumsi data dalam sen, ubah ke Rupiah (kurs kasar 15.500)
    try:
        usd = float(price_raw) / 100 
        idr = usd * 15500 
        return int(idr)
    except:
        return 0

def clean_storage_type(text):
    # Deteksi apakah SSD atau HDD
    if 'SSD' in str(text).upper():
        return 'SSD'
    return 'HDD'

def classify_gpu_type(gpu_text):
    # Label kategori: Dedicated vs Integrated
    gpu_text = str(gpu_text).upper()
    if 'RTX' in gpu_text or 'GTX' in gpu_text or 'RADEON RX' in gpu_text:
        return 'Dedicated'
    return 'Integrated'

# --- B. FUNGSI SKOR CPU (HYBRID) ---
def get_cpu_score(nama_laptop_cpu):
    target = str(nama_laptop_cpu).lower()
    
    # 1. Cek di Dictionary (Jika file referensi ada)
    best_match_score = 0
    longest_match_len = 0
    
    for cpu_key, score in cpu_dictionary.items():
        if cpu_key in target:
            # Ambil match terpanjang agar akurat
            if len(cpu_key) > longest_match_len:
                longest_match_len = len(cpu_key)
                best_match_score = score
    
    if best_match_score > 0:
        return best_match_score

    # 2. Fallback / Tebakan (Jika tidak ada di dictionary)
    if 'ultra 9' in target: return 28000
    if 'ultra 7' in target: return 24000
    if 'ultra 5' in target: return 18000
    if 'i9' in target or 'ryzen 9' in target: return 22000
    if 'i7' in target or 'ryzen 7' in target: return 15000
    if 'i5' in target or 'ryzen 5' in target: return 10000
    if 'i3' in target or 'ryzen 3' in target: return 6000
    if 'celeron' in target or 'n4' in target: return 2000
    return 3000

# --- C. FUNGSI SKOR GPU (VERSI 2025) ---
def get_gpu_score_2025(gpu_raw):
    # 1. Bersihkan teks & jadikan huruf kecil
    name = str(gpu_raw).lower()
    
    # TIER 1: THE MONSTERS
    if 'rtx 509' in name: return 35000  
    if 'rtx 508' in name: return 30000  
    if 'rtx 409' in name: return 30000  
    if 'rtx 408' in name: return 26000  
    if 'rx 7900' in name: return 25000  
    
    # TIER 2: HIGH END
    if 'rtx 507' in name: return 24000  
    if 'rtx 407' in name: return 22000  
    if 'rtx 308' in name: return 20000  
    if 'rx 7800' in name: return 20000
    if 'rx 6800' in name: return 18000
    
    # TIER 3: MID RANGE
    if 'rtx 506' in name: return 19000  
    if 'rtx 406' in name: return 18000  
    if 'rtx 307' in name: return 16000  
    if 'rx 7700' in name or 'rx 7600' in name: return 15000
    if 'arc a7' in name: return 13000   
    
    # TIER 4: ENTRY GAMING
    if 'rtx 505' in name: return 15000  
    if 'rtx 405' in name: return 14000  
    if 'rtx 306' in name: return 13000  
    if 'rtx 305' in name: return 9500   
    if 'rtx 205' in name: return 7500   
    if 'gtx 166' in name: return 8000   
    if 'gtx 165' in name: return 7000   
    if 'arc a5' in name: return 8000
    if 'rx 6500' in name or 'rx 6400' in name: return 6500
    
    # TIER 5: NEXT-GEN INTEGRATED
    if 'radeon 890m' in name: return 6000 
    if 'radeon 880m' in name: return 5500
    if 'radeon 780m' in name: return 5000
    if 'radeon 680m' in name: return 4500
    if 'arc graphics' in name or 'intel arc' in name: return 4500 
        
    # TIER 6: STANDARD INTEGRATED
    if 'iris' in name or 'xe graphics' in name: return 2500 
    if 'radeon' in name: return 2000  
    if 'uhd' in name: return 1000     
    
    return 1500

# --- D. FUNGSI SKOR LAYAR ---
def get_screen_quality(display_text):
    text = str(display_text).lower()
    score = 0
    # Logika Skor Layar
    if 'ips' in text or 'oled' in text: score += 50 # Panel Bagus
    if '1920' in text or 'fhd' in text: score += 30 # Resolusi Standar
    if '3840' in text or '4k' in text: score += 50 # Resolusi Tinggi
    if '1366' in text: score -= 20 # Resolusi Jelek (Buram)
    return score

In [None]:
# =============================================================================
# TAHAP 3: EKSEKUSI (PENERAPAN FUNGSI KE DATA)
# =============================================================================
print("2. Menerapkan logika pembersihan dan penilaian...")

# Cleaning Data Dasar
df['Price_IDR'] = df['Harga_USD'].apply(clean_price)
df['RAM_Clean'] = df['RAM'].apply(clean_ram)
df['Storage_Type'] = df['Storage'].apply(clean_storage_type)
df['GPU_Class'] = df['GPU'].apply(classify_gpu_type) # Label (Dedicated/Integrated)

# Scoring (Memberi Nilai)
df['CPU_Score'] = df['Processor'].apply(get_cpu_score)
df['GPU_Score'] = df['GPU'].apply(get_gpu_score_2025) # Menggunakan logika baru
df['Screen_Score'] = df['Display'].apply(get_screen_quality)

In [None]:
# =============================================================================
# TAHAP 4: SIMPAN HASIL
# =============================================================================
print("3. Menyimpan hasil...")

# Menampilkan preview data sebelum disimpan
cols_to_show = ['Nama_Laptop', 'Price_IDR', 'RAM_Clean', 'CPU_Score', 'GPU_Score', 'Screen_Score']
print("\n--- PREVIEW DATASET FINAL ---")
print(df[cols_to_show].head(5))

# Simpan ke CSV baru
output_filename = 'dataset_final_super_lengkap.csv'
df.to_csv(output_filename, index=False)
print(f"\n[SUKSES] Data berhasil disimpan ke '{output_filename}'")