Load Data

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

PROJECT_ROOT = Path("..").resolve()
IN_CSV = PROJECT_ROOT / "outputs" / "extracted_features.csv"

OUT_DIR = PROJECT_ROOT / "outputs"
RANK_DIR = OUT_DIR / "rankings"
DATASET_DIR = OUT_DIR / "datasets"
RANK_DIR.mkdir(parents=True, exist_ok=True)
DATASET_DIR.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(IN_CSV)

feature_cols = ["CNT","CRL","SKEN","KTS","VAR","STD","ENT","EG","MN","HGN","RMS","SM","IDM"]
X = df[feature_cols].values
y = df["Output"].astype(int).values


Scaling (For Lasso)

In [2]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


Fit L1 Logistic Regression (LASSO)

In [3]:
lasso = LogisticRegression(
    penalty="l1",
    solver="liblinear",
    C=1.0,
    max_iter=5000,
    random_state=42
)

lasso.fit(X_scaled, y)
coef = np.abs(lasso.coef_.ravel())

lasso_rank = (
    pd.DataFrame({"feature": feature_cols, "score": coef})
      .sort_values("score", ascending=False)
)

lasso_rank_path = RANK_DIR / "lasso_rank.csv"
lasso_rank.to_csv(lasso_rank_path, index=False)

lasso_rank




Unnamed: 0,feature,score
0,CNT,3.296814
6,ENT,1.679808
9,HGN,1.601518
12,IDM,1.385339
4,VAR,0.636574
7,EG,0.512021
1,CRL,0.427422
3,KTS,0.422862
8,MN,0.326265
2,SKEN,0.313158


Top-K CSV LASSO (Top 5/7/9/10)

In [4]:
TOP_K_LIST = [5, 7, 9, 10]

for k in TOP_K_LIST:
    top_feats = lasso_rank["feature"].head(k).tolist()
    out_df = df[top_feats + ["Output"]].copy()
    out_path = DATASET_DIR / f"data{k}L.csv"   # L = LASSO
    out_df.to_csv(out_path, index=False)
    print("Saved:", out_path, "| cols:", len(out_df.columns))


Saved: E:\Kuliah\Pengenalan Pola\final-project\outputs\datasets\data5L.csv | cols: 6
Saved: E:\Kuliah\Pengenalan Pola\final-project\outputs\datasets\data7L.csv | cols: 8
Saved: E:\Kuliah\Pengenalan Pola\final-project\outputs\datasets\data9L.csv | cols: 10
Saved: E:\Kuliah\Pengenalan Pola\final-project\outputs\datasets\data10L.csv | cols: 11
