# DAA Experiment Harness (Template)
**Tujuan**: kerangka eksperimen untuk membandingkan *Algoritma A* vs *Algoritma B* pada instance unik per kelompok.
1. Isi `algo_A` dan `algo_B`.
2. Sesuaikan `generate_instances` atau loader data.
3. Atur `Ns`, `repeats`, `seed`.


In [41]:
import os, time, random, statistics
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

Path('results').mkdir(exist_ok=True)

random.seed(42)
np.random.seed(42)

## Data Aquisition

In [42]:
##LOAD DATA

import pandas as pd

df = pd.read_csv(
    r"E:\DAA_Kelompok4_KelasA\data\normalized_fix_2.csv"
)

df.head(10)

Unnamed: 0,Hari,Mata Kuliah,Semester,SKS,Dosen,Ruang,Kelas,Kapasitas Kelas,start,finish,duration,jumlah_mahasiswa,profit_density
0,Senin,Kalkulus I,1,3,SUPRIYADI WIBOWO,R001,A,25,09:20:00,10:10:00,50.0,16,0.06
1,Senin,Kalkulus I,1,3,SUPRIYADI WIBOWO,R001,A,25,10:15:00,11:05:00,50.0,13,0.06
2,Senin,Kalkulus I,1,3,SUPRIYADI WIBOWO,R001,A,25,11:10:00,12:00:00,50.0,22,0.06
3,Senin,Pengolahan Citra Digital,5,3,HERI PRASETYO,R001,B,25,13:00:00,13:50:00,50.0,24,0.06
4,Senin,Pengolahan Citra Digital,5,3,HERI PRASETYO,R001,B,25,13:55:00,14:45:00,50.0,20,0.06
5,Senin,Pengolahan Citra Digital,5,3,HERI PRASETYO,R001,B,25,15:30:00,16:20:00,50.0,17,0.06
6,Senin,Data Mining,5,3,WIRANTO,R001,E,25,16:25:00,17:15:00,50.0,22,0.06
7,Senin,Data Mining,5,3,WIRANTO,R001,E,25,18:10:00,19:00:00,50.0,14,0.06
8,Senin,Data Mining,5,3,WIRANTO,R001,E,25,22:15:00,23:05:00,50.0,16,0.06
9,Senin,Bahasa Indonesia,1,2,KUNDHARU SADDHONO,R002,C,40,13:00:00,13:50:00,50.0,34,0.04


In [43]:
df.columns

Index(['Hari', 'Mata Kuliah', 'Semester', 'SKS', 'Dosen', 'Ruang', 'Kelas',
       'Kapasitas Kelas', 'start', 'finish', 'duration', 'jumlah_mahasiswa',
       'profit_density'],
      dtype='object')

In [44]:
def load_dataset(path):
    df = pd.read_csv(path)

    # FORMAT JAM:MENIT:DETIK
    df['start'] = pd.to_datetime(df['start'], format='%H:%M:%S').dt.time
    df['finish'] = pd.to_datetime(df['finish'], format='%H:%M:%S').dt.time

    return df

## Implementasi Algoritma A (Greedy EFT)

In [45]:
def greedy_earliest_finish(intervals):
    sorted_df = intervals.sort_values('finish')

    selected = []
    last_finish = None

    for idx, row in sorted_df.iterrows():
        if last_finish is None or row['start'] >= last_finish:
            selected.append(idx)
            last_finish = row['finish']

    return selected

In [46]:
##KASUS KECIL
import pandas as pd

data = {
    'start': ['08:00', '08:30', '09:00', '10:30', '09:30'],
    'finish': ['09:00', '10:00', '10:30', '11:30', '11:00']
}

df_test = pd.DataFrame(data)
df_test['start'] = pd.to_datetime(df_test['start']).dt.time
df_test['finish'] = pd.to_datetime(df_test['finish']).dt.time

selected = greedy_earliest_finish(df_test)
print("Index terpilih:", selected)
print(df_test.loc[selected])


Index terpilih: [0, 2, 3]
      start    finish
0  08:00:00  09:00:00
2  09:00:00  10:30:00
3  10:30:00  11:30:00


  df_test['start'] = pd.to_datetime(df_test['start']).dt.time
  df_test['finish'] = pd.to_datetime(df_test['finish']).dt.time


## Implementasi Algoritma B (Greedy Profit Density)

In [47]:
def greedy_profit_density(intervals):
    sorted_df = intervals.sort_values(
        'profit_density', ascending=False
    )

    selected = []
    last_finish = None

    for idx, row in sorted_df.iterrows():
        if last_finish is None or row['start'] >= last_finish:
            selected.append(idx)
            last_finish = row['finish']

    return selected


In [48]:
## KASUS KECIL
import pandas as pd

data = {
    'start': ['08:00', '08:30', '09:00', '10:30'],
    'finish': ['09:00', '10:00', '10:30', '11:30'],
    'duration': [1.0, 1.5, 1.5, 1.0],
    'jumlah_mahasiswa': [30, 75, 20, 60],
    'profit_density': [30, 50, 13, 60]
}

df_test = pd.DataFrame(data)
df_test['start'] = pd.to_datetime(df_test['start']).dt.time
df_test['finish'] = pd.to_datetime(df_test['finish']).dt.time

selected = greedy_profit_density(df_test)

print("Index terpilih:", selected)
print(df_test.loc[selected])


Index terpilih: [3]
      start    finish  duration  jumlah_mahasiswa  profit_density
3  10:30:00  11:30:00       1.0                60              60


  df_test['start'] = pd.to_datetime(df_test['start']).dt.time
  df_test['finish'] = pd.to_datetime(df_test['finish']).dt.time


## Pembangkit/Loader Instance (SESUAIKAN)

In [49]:
def generate_instances(df):
    instances = []

    for (hari, ruang), group in df.groupby(['Hari', 'Ruang']):
        if len(group) < 2:
            continue

        group = group.reset_index(drop=True)

        instances.append({
            'hari': hari,
            'ruang': ruang,
            'data': group
        })

    return instances

In [50]:
df = load_dataset("E:/DAA_Kelompok4_KelasA/data/normalized_fix_2.csv")
instances = generate_instances(df)

print(len(instances))
print(instances[0]['data'].head())

50
    Hari             Mata Kuliah  Semester  SKS           Dosen Ruang Kelas  \
0  Jumat  Pendidikan Agama Islam         1    2  RELLY PRIHATIN  R001     A   
1  Jumat  Pendidikan Agama Islam         1    2  RELLY PRIHATIN  R001     A   

   Kapasitas Kelas     start    finish  duration  jumlah_mahasiswa  \
0               25  16:25:00  17:15:00      50.0                15   
1               25  18:10:00  19:00:00      50.0                22   

   profit_density  
0            0.04  
1            0.04  


In [51]:
##CEK ALGORITMA SATU INSTANCES
inst = instances[0]['data']

print("EFT:", greedy_earliest_finish(inst))
print("PD :", greedy_profit_density(inst))


EFT: [0, 1]
PD : [0, 1]


## Evaluator & Timing

In [52]:
# ============================================================
# EVALUATOR
# ============================================================

def evaluate_solution(intervals, selected_idx):
    """
    Menghitung metrik evaluasi dari solusi algoritma
    """

    # Edge case: tidak ada interval terpilih
    if selected_idx is None or len(selected_idx) == 0:
        return {
            'n_selected': 0,
            'total_duration': 0,
            'total_students': 0
        }

    # PAKAI iloc → aman dari index error
    selected = intervals.iloc[selected_idx]

    return {
        'n_selected': len(selected),
        'total_duration': selected['duration'].sum(),
        'total_students': selected['jumlah_mahasiswa'].sum()
    }


In [53]:
import time
import tracemalloc

# ============================================================
# TIMING + MEMORY
# ============================================================

def run_with_time_and_memory(algo_func, instance_df):
    """
    Menjalankan algoritma + mengukur waktu & memori
    """

    tracemalloc.start()
    start = time.perf_counter()

    selected_idx = algo_func(instance_df)

    end = time.perf_counter()
    _, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    exec_time_ms = (end - start) * 1000
    peak_memory_kb = peak / 1024

    return selected_idx, exec_time_ms, peak_memory_kb


In [54]:
# ============================================================
# EXPERIMENT RUNNER
# ============================================================

def run_experiment(instances, algo_func, algo_name):
    results = []

    for inst in instances:
        df_inst = inst['data']

        # Jalankan algoritma + ukur waktu & memori
        selected_idx, exec_time, peak_mem = run_with_time_and_memory(
            algo_func, df_inst
        )

        # Evaluasi solusi
        metrics = evaluate_solution(df_inst, selected_idx)

        results.append({
            'algo': algo_name,
            'hari': inst['hari'],
            'ruang': inst['ruang'],
            'n_interval': len(df_inst),
            'n_selected': metrics['n_selected'],
            'total_duration': metrics['total_duration'],
            'total_students': metrics['total_students'],
            'exec_time_ms': exec_time,
            'peak_memory_kb': peak_mem
        })

    return pd.DataFrame(results)


In [55]:
inst = instances[0]['data']
print(inst.head())

    Hari             Mata Kuliah  Semester  SKS           Dosen Ruang Kelas  \
0  Jumat  Pendidikan Agama Islam         1    2  RELLY PRIHATIN  R001     A   
1  Jumat  Pendidikan Agama Islam         1    2  RELLY PRIHATIN  R001     A   

   Kapasitas Kelas     start    finish  duration  jumlah_mahasiswa  \
0               25  16:25:00  17:15:00      50.0                15   
1               25  18:10:00  19:00:00      50.0                22   

   profit_density  
0            0.04  
1            0.04  


In [56]:
sel = greedy_earliest_finish(inst)
print("Selected:", sel)

metrics = evaluate_solution(inst, sel)
print(metrics)


Selected: [0, 1]
{'n_selected': 2, 'total_duration': np.float64(100.0), 'total_students': np.int64(37)}


In [57]:
results_A = run_experiment(
    instances,
    greedy_earliest_finish,
    algo_name="EFT"
)

results_B = run_experiment(
    instances,
    greedy_profit_density,
    algo_name="ProfitDensity"
)

results_all = pd.concat([results_A, results_B], ignore_index=True)

print(results_all.head())


  algo   hari ruang  n_interval  n_selected  total_duration  total_students  \
0  EFT  Jumat  R001           2           2           100.0              37   
1  EFT  Jumat  R002           7           7           355.0             193   
2  EFT  Jumat  R003           7           7           350.0             142   
3  EFT  Jumat  R004           6           6           300.0             122   
4  EFT  Jumat  R005           7           7           350.0             117   

   exec_time_ms  peak_memory_kb  
0        8.5424        7.984375  
1       13.5702       10.937500  
2        9.4305       10.726562  
3        7.5738       10.414062  
4       10.2153       10.773438  


## Eksekusi Eksperimen (atur parameter)

In [None]:
import pandas as pd
import numpy as np
import time

# =========================================================
# 1. LOAD & PREPROCESS DATA
# =========================================================

df = pd.read_csv("E:\DAA_Kelompok4_KelasA\data\normalized_fix_2.csv")
df["profit_density"] = df["profit_density"].fillna(0)

day_map = {
    'Senin': 0,
    'Selasa': 1,
    'Rabu': 2,
    'Kamis': 3,
    'Jumat': 4,
    'Sabtu': 5,
    'Minggu': 6
}

def get_minutes(day, time_str):
    if day not in day_map:
        return -1
    h, m, s = map(int, time_str.split(':'))
    return day_map[day] * 24 * 60 + h * 60 + m

df["start_abs"] = df.apply(lambda x: get_minutes(x["Hari"], x["start"]), axis=1)
df["finish_abs"] = df.apply(lambda x: get_minutes(x["Hari"], x["finish"]), axis=1)

# Add 'id' column based on the DataFrame's index
df['id'] = df.index

# =========================================================
# 2. GREEDY ALGORITHMS
# =========================================================

def run_algo_eft(data):
    start_time = time.perf_counter()

    sorted_data = data.sort_values(
    by=["finish_abs", "start_abs", "id"], kind="mergesort")
    schedule = []
    last_finish = -1

    for _, row in sorted_data.iterrows():
        if row["start_abs"] >= last_finish:
            schedule.append(row)
            last_finish = row["finish_abs"]

    runtime_ms = (time.perf_counter() - start_time) * 1000
    return schedule, runtime_ms


def run_algo_density(data):
    start_time = time.perf_counter()

    sorted_data = data.sort_values(
    by=["finish_abs", "start_abs", "id"],  # Disamakan dengan logika EFT
    ascending=[True, True, True],          # Disamakan (Ascending semua)
    kind="mergesort")
    schedule = []
    occupied = []

    for _, row in sorted_data.iterrows():
        s, f = row["start_abs"], row["finish_abs"]
        conflict = False

        for os, of in occupied:
            if s < of and f > os:
                conflict = True
                break

        if not conflict:
            schedule.append(row)
            occupied.append((s, f))

    runtime_ms = (time.perf_counter() - start_time) * 1000
    return schedule, runtime_ms


# =========================================================
# 3. EXPERIMENT SETUP (FORMAT MIRIP TSP)
# =========================================================

results = []
experiment_id = "Greedy_Scheduling_EFT_vs_Density"

sizes_to_test = [10, 20, 30, 40]
seeds_to_test = [0, 1]
r = 2

# =========================================================
# 4. RUN EXPERIMENTS
# =========================================================

for sz in sizes_to_test:
    for seed in seeds_to_test:
        print("\n=====================================")
        print(f"=== EKSPERIMEN GREEDY | n = {sz} | seed = {seed} ===")
        print("=====================================")

        subset = df.sample(n=sz, random_state=seed).reset_index(drop=True)

        # ---------------------- GREEDY EFT ----------------------
        print("\n[Greedy EFT] Running...")
        eft_schedule, eft_time = run_algo_eft(subset)

        count_eft = len(eft_schedule)
        sks_eft = sum(row["SKS"] for row in eft_schedule)

        print("[Greedy EFT] Hasil:")
        print(f"  - n : {sz}")
        print(f"  - seed    : {seed}")
        print(f"  - repeat    : {r}")
        print(f"  - algorithm: EFT")
        print(f"  - Runtime (ms) : {eft_time:.4f}")
        print(f"  - Interval (Kelas) : {count_eft}")
        print(f"  - Profit (SKS)   : {sks_eft}")
        print(f"  - Selected IDs   : {[row['id'] for row in eft_schedule]}")



        results.append({
            "experiment_id": experiment_id,
            "n": sz,
            "seed": seed,
            "algorithm": "EFT",
            "scheduled_classes": count_eft,
            "total_sks": sks_eft,
            "runtime_ms": eft_time
        })

        # ---------------------- GREEDY DENSITY ----------------------
        print("\n[Greedy Density] Running...")
        den_schedule, den_time = run_algo_density(subset)

        count_den = len(den_schedule)
        sks_den = sum(row["SKS"] for row in den_schedule)

        print("[Greedy Density] Hasil:")
        print(f"  - n : {sz}")
        print(f"  - seed    : {seed}")
        print(f"  - repeat    : {r}")
        print(f"  - algorithm: Density")
        print(f"  - Runtime (ms) : {den_time:.4f}")
        print(f"  - Interval (Kelas) : {count_den}")
        print(f"  - Profit (SKS)   : {sks_den}")
        print(f"  - Selected IDs   : {[row['id'] for row in den_schedule]}")


        results.append({
            "experiment_id": experiment_id,
            "n": sz,
            "seed": seed,
            "algorithm": "Density",
            "scheduled_classes": count_den,
            "total_sks": sks_den,
            "runtime_ms": den_time
        })

# =========================================================
# 5. SUMMARY & EXPORT
# =========================================================

results_df = pd.DataFrame(results)

summary = results_df.pivot_table(
    index=["n", "seed"],
    columns="algorithm",
    values=["scheduled_classes", "total_sks", "runtime_ms"]
)

print("\n========== RINGKASAN HASIL ==========")
print(summary)

FileNotFoundError: [Errno 2] No such file or directory: 'normalized_fix_2.csv'

## Hardware Specification

In [20]:
import platform
import psutil
import cpuinfo
import shutil
import subprocess

def get_local_hw_spec():
    # OS & Python
    os_name = platform.system()
    os_version = platform.platform()
    python_version = platform.python_version()

    # CPU
    cpu_brand = cpuinfo.get_cpu_info().get("brand_raw", "Unknown")
    cpu_cores_physical = psutil.cpu_count(logical=False)
    cpu_cores_logical = psutil.cpu_count(logical=True)

    # RAM
    ram_total_gb = psutil.virtual_memory().total / (1024 ** 3)

    # GPU (optional, Windows & Linux)
    gpu_name = "Not detected"
    try:
        if os_name == "Windows":
            out = subprocess.check_output(
                "wmic path win32_VideoController get name",
                shell=True
            ).decode()
            lines = [l.strip() for l in out.splitlines() if l.strip() and "Name" not in l]
            if lines:
                gpu_name = ", ".join(lines)

        elif os_name == "Linux":
            out = subprocess.check_output(
                ["bash", "-lc", "lspci | grep -i vga"],
                stderr=subprocess.DEVNULL
            ).decode().strip()
            if out:
                gpu_name = out

    except:
        pass

    return {
        "OS": os_version,
        "Python": python_version,
        "CPU": cpu_brand,
        "CPU Cores (Physical)": cpu_cores_physical,
        "CPU Cores (Logical)": cpu_cores_logical,
        "RAM Total (GB)": round(ram_total_gb, 2),
        "GPU": gpu_name
    }

# Jalankan
spec = get_local_hw_spec()

print("=== HARDWARE SPEC (Local VS Code Runtime) ===")
for k, v in spec.items():
    print(f"{k:>22}: {v}")


=== HARDWARE SPEC (Local VS Code Runtime) ===
                    OS: Windows-11-10.0.26200-SP0
                Python: 3.13.1
                   CPU: AMD Ryzen 7 7435HS
  CPU Cores (Physical): 8
   CPU Cores (Logical): 16
        RAM Total (GB): 15.82
                   GPU: NVIDIA GeForce RTX 2050


## Measure Time and Memory Used

In [58]:
import time
import tracemalloc
import pandas as pd

def run_with_time_and_memory(algo_func, instance_df):
    """
    Mengukur waktu eksekusi dan penggunaan memori algoritma
    """

    tracemalloc.start()
    start_time = time.perf_counter()

    selected_idx = algo_func(instance_df)

    end_time = time.perf_counter()
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()

    exec_time_ms = (end_time - start_time) * 1000
    peak_memory_kb = peak / 1024

    return selected_idx, exec_time_ms, peak_memory_kb


In [59]:
def run_experiment(instances, algo_func, algo_name):
    results = []

    for inst in instances:
        df_inst = inst['data']

        # Measure time & memory
        selected_idx, exec_time, peak_mem = run_with_time_and_memory(
            algo_func, df_inst
        )

        # Evaluasi hasil
        metrics = evaluate_solution(df_inst, selected_idx)

        results.append({
            'algorithm': algo_name,
            'hari': inst['hari'],
            'ruang': inst['ruang'],
            'n_interval': len(df_inst),
            'n_selected': metrics['n_selected'],
            'total_duration': metrics['total_duration'],
            'total_students': metrics['total_students'],
            'exec_time_ms': exec_time,
            'peak_memory_kb': peak_mem
        })

    return pd.DataFrame(results)


In [60]:
# Algoritma A — Earliest Finish Time
results_A = run_experiment(
    instances,
    greedy_earliest_finish,
    algo_name="EFT"
)

# Algoritma B — Profit Density
results_B = run_experiment(
    instances,
    greedy_profit_density,
    algo_name="ProfitDensity"
)

# Gabungkan hasil
results_all = pd.concat([results_A, results_B], ignore_index=True)


In [61]:
print(results_all.head())


  algorithm   hari ruang  n_interval  n_selected  total_duration  \
0       EFT  Jumat  R001           2           2           100.0   
1       EFT  Jumat  R002           7           7           355.0   
2       EFT  Jumat  R003           7           7           350.0   
3       EFT  Jumat  R004           6           6           300.0   
4       EFT  Jumat  R005           7           7           350.0   

   total_students  exec_time_ms  peak_memory_kb  
0              37        8.2603        7.968750  
1             193       13.3139       10.125000  
2             142       11.2824       10.070312  
3             122        8.1118        9.804688  
4             117        9.0413        9.296875  


In [62]:
summary = (
    results_all
    .groupby('algorithm')[['exec_time_ms', 'peak_memory_kb']]
    .mean()
)

print(summary)


               exec_time_ms  peak_memory_kb
algorithm                                  
EFT               10.689706        9.797969
ProfitDensity     11.690480        9.670156


## Plot & Tabel

## Kompleksitas Algoritma

## (Opsional) Uji Statistik (paired t-test)

## Ekspor tabel LaTeX (opsional)