# Bytecode Static Feature

## Set up

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os
import pandas as pd
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

from utils.comparing import report_for_multiple_model

PATH = Path.cwd().parents[1]
DATA_PATH = os.path.join(PATH, 'data/labeled')

## Load Data

In [3]:
feature_df = pd.read_csv(os.path.join(DATA_PATH, 'bytecode_feature.csv')).set_index('Address')
groundtruth_df = pd.read_csv(os.path.join(DATA_PATH, 'groundtruth.csv')).set_index('Address')
feature_cols = [col for col in feature_df.columns]
label_cols = [col for col in groundtruth_df.columns]
merged_df = pd.merge(groundtruth_df, feature_df, left_index=True, right_index=True, how='inner')


## Separate features and labels

In [4]:
X = merged_df[feature_cols]
y = merged_df[label_cols]

## Scale/Normalize Features

In [5]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


## Classification report

In [6]:
report_df, X_train, X_test, y_train, y_test = report_for_multiple_model(X_scaled, y)

[LightGBM] [Info] Number of positive: 14, number of negative: 41
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001592 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12987
[LightGBM] [Info] Number of data points in the train set: 55, number of used features: 1067
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.254545 -> initscore=-1.074515
[LightGBM] [Info] Start training from score -1.074515
[LightGBM] [Info] Number of positive: 6, number of negative: 49
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001556 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12987
[LightGBM] [Info] Number of data points in the train set: 55, number of used features: 1067
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.109091 -> initscore=-2.100061
[LightGBM] [Info] Start training from score -2.100061
[LightGBM] [Info] Number 

In [7]:
report_df.sort_values(by='macro avg f1', ascending=False)

Unnamed: 0,micro avg f1,macro avg f1,Mint f1,Leak f1,Limit f1
OneVsRest(MLP),0.625,0.605664,0.444444,0.666667,0.705882
MultiOutput(MLP),0.625,0.605664,0.444444,0.666667,0.705882
MultiOutput(LogisticRegression),0.580645,0.578704,0.444444,0.666667,0.625
OneVsRest(LogisticRegression),0.580645,0.578704,0.444444,0.666667,0.625
MultiOutput(SGD),0.604651,0.570948,0.571429,0.363636,0.777778
OneVsRest(SGD),0.604651,0.570948,0.571429,0.363636,0.777778
OneVsRest(RandomForest),0.571429,0.566667,0.666667,0.5,0.533333
MultiOutput(RandomForest),0.571429,0.566667,0.666667,0.5,0.533333
OneVsRest(XGBoost),0.580645,0.562745,0.6,0.5,0.588235
MultiOutput(XGBoost),0.580645,0.562745,0.6,0.5,0.588235


## Tuning

In [8]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.multiclass import OneVsRestClassifier

base_model = MLPClassifier(max_iter=500, random_state=42)

ovr_mlp = OneVsRestClassifier(base_model)

param_grid = {
    'estimator__hidden_layer_sizes': [(100,), (100, 50), (200,)],
    'estimator__activation': ['relu', 'tanh'],
    'estimator__alpha': [1e-5, 1e-4, 1e-3],
    'estimator__learning_rate_init': [0.001, 0.01],
    'estimator__solver': ['adam'],
    'estimator__early_stopping': [True],
}

grid = GridSearchCV(
    estimator=ovr_mlp,
    param_grid=param_grid,
    scoring='f1_macro',  # or f1_samples, f1_weighted
    cv=5,
    verbose=2,
    n_jobs=-1
)

grid.fit(X_train, y_train)

print("Best Params:", grid.best_params_)
print("Best Score:", grid.best_score_)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END estimator__activation=relu, estimator__alpha=1e-05, estimator__early_stopping=True, estimator__hidden_layer_sizes=(100,), estimator__learning_rate_init=0.001, estimator__solver=adam; total time=   0.4s
[CV] END estimator__activation=relu, estimator__alpha=1e-05, estimator__early_stopping=True, estimator__hidden_layer_sizes=(100,), estimator__learning_rate_init=0.001, estimator__solver=adam; total time=   0.4s
[CV] END estimator__activation=relu, estimator__alpha=1e-05, estimator__early_stopping=True, estimator__hidden_layer_sizes=(100,), estimator__learning_rate_init=0.001, estimator__solver=adam; total time=   0.4s
[CV] END estimator__activation=relu, estimator__alpha=1e-05, estimator__early_stopping=True, estimator__hidden_layer_sizes=(100,), estimator__learning_rate_init=0.01, estimator__solver=adam; total time=   0.4s
[CV] END estimator__activation=relu, estimator__alpha=1e-05, estimator__early_stopping=True, es

In [9]:
best_params_clean = {
    k.replace("estimator__", ""): v for k, v in grid.best_params_.items()
}

base_model = MLPClassifier(**best_params_clean, max_iter=500, random_state=42)

final_model = OneVsRestClassifier(base_model)
final_model.fit(X_train, y_train)

In [10]:
y_pred = final_model.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         6
           1       0.38      1.00      0.55         3
           2       0.78      0.78      0.78         9

   micro avg       0.57      0.72      0.63        18
   macro avg       0.55      0.76      0.61        18
weighted avg       0.62      0.72      0.65        18
 samples avg       0.45      0.55      0.47        18

