In [5]:
import pandas as pd
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.metrics import (
    f1_score, 
    precision_score, 
    recall_score, 
    accuracy_score, 
    classification_report
)
from sklearn.model_selection import StratifiedKFold
import numpy as np
import pickle

In [6]:
data = pd.read_pickle("/Users/mac/Desktop/Code_Smell_Detection/dataset/feature_envy/codeT5_embeddings/processed_embeddings/codeT5_base_embed_line_by_line_avg.pkl")
data.head()

Unnamed: 0,emb_1,emb_2,emb_3,emb_4,emb_5,emb_6,emb_7,emb_8,emb_9,emb_10,...,emb_762,emb_763,emb_764,emb_765,emb_766,emb_767,emb_768,label,sample_id,severity
0,-0.11939,-0.123219,-0.165197,0.104631,0.112225,0.185794,0.041907,0.204066,-0.113431,-0.045804,...,0.0617,0.179728,-0.219134,0.135207,-0.091973,0.392297,-0.211298,1,12150,critical
1,-0.123372,-0.334265,-0.142434,0.24927,0.077883,0.297202,-0.027254,0.134641,0.030748,-0.151118,...,0.131367,0.066678,-0.121032,0.097655,-0.00078,0.286284,-0.099197,1,10168,critical
2,-0.091274,-0.123337,-0.150979,0.107054,0.036551,0.203183,0.055389,0.084583,0.015683,-0.099683,...,-0.024028,0.242384,-0.12092,0.124597,0.049541,0.288674,-0.161974,1,12783,critical
3,-0.067974,-0.20028,-0.24763,0.119339,0.03948,0.227915,0.03201,0.148257,0.005719,-0.083442,...,-0.068718,0.244488,-0.099503,0.152847,-0.018549,0.272525,-0.134554,1,12754,critical
4,-0.103793,-0.120461,-0.1832,0.155821,0.048811,0.150119,0.067279,0.097346,-0.014182,-0.106619,...,-0.01151,0.259759,-0.139378,0.181213,0.01928,0.346813,-0.155173,1,4277,critical


In [7]:
X = data[[col for col in data.columns if col.startswith("emb_")]].values  # Select embedding columns
y = data['label'].values 

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define classifiers
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
bagging = BaggingClassifier(estimator=RandomForestClassifier(), n_estimators=100, random_state=42)  
xgboost = xgb.XGBClassifier(n_estimators=100, random_state=42)

# Add the classifiers to a dictionary
classifiers = {
    "Random Forest": random_forest,
    "Bagging": bagging,
    "XGBoost": xgboost
}

In [8]:
# Initialize StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Dictionary to store cross-validation results
results = {}

# Perform cross-validation for each classifier
for clf_name, clf in classifiers.items():
    metrics_summary = {
        "f1_micro": [],
        "f1_macro": [],
        "accuracy": [],
        "precision_macro": [],
        "recall_macro": []
    }
    
    for train_idx, val_idx in skf.split(X_train, y_train):
        # Split the training data into training and validation folds
        X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
        y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]
        
        # Train the classifier
        clf.fit(X_train_fold, y_train_fold)
        
        # Make predictions on the validation set
        y_pred = clf.predict(X_val_fold)
        
        # Calculate metrics
        metrics_summary["f1_micro"].append(f1_score(y_val_fold, y_pred, average='micro'))
        metrics_summary["f1_macro"].append(f1_score(y_val_fold, y_pred, average='macro'))
        metrics_summary["accuracy"].append(accuracy_score(y_val_fold, y_pred))
        metrics_summary["precision_macro"].append(precision_score(y_val_fold, y_pred, average='macro'))
        metrics_summary["recall_macro"].append(recall_score(y_val_fold, y_pred, average='macro'))
    
    # Store mean metrics for the classifier
    results[clf_name] = {metric: np.mean(scores) for metric, scores in metrics_summary.items()}

# Print cross-validation results
print("\nCross-Validation Results:")
for clf_name, metrics in results.items():
    print(f"\n{clf_name} Metrics:")
    for metric, value in metrics.items():
        print(f"{metric.capitalize()}: {value:.4f}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Cross-Validation Results:

Random Forest Metrics:
F1_micro: 0.8645
F1_macro: 0.4671
Accuracy: 0.8645
Precision_macro: 0.5322
Recall_macro: 0.5018

Bagging Metrics:
F1_micro: 0.8640
F1_macro: 0.4635
Accuracy: 0.8640
Precision_macro: 0.4320
Recall_macro: 0.5000

XGBoost Metrics:
F1_micro: 0.8598
F1_macro: 0.5048
Accuracy: 0.8598
Precision_macro: 0.6282
Recall_macro: 0.5180


In [9]:
# Final evaluation on the test set
final_results = {}

for clf_name, clf in classifiers.items():
    # Train the classifier on the entire training set
    clf.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred_test = clf.predict(X_test)
    
    # Calculate test metrics
    f1_micro_test = f1_score(y_test, y_pred_test, average='micro')
    f1_macro_test = f1_score(y_test, y_pred_test, average='macro')
    precision_test = precision_score(y_test, y_pred_test, average='macro')
    recall_test = recall_score(y_test, y_pred_test, average='macro')
    accuracy_test = accuracy_score(y_test, y_pred_test)
    report_test = classification_report(y_test, y_pred_test)
    
    # Save final metrics for the classifier
    final_results[clf_name] = {
        "f1_micro": f1_micro_test,
        "f1_macro": f1_macro_test,
        "precision_macro": precision_test,
        "recall_macro": recall_test,
        "accuracy": accuracy_test,
        "classification_report": report_test
    }

# Print final test results
print("\nFinal Test Results:")
for clf_name, metrics in final_results.items():
    print(f"\n{clf_name} Metrics:")
    for metric, value in metrics.items():
        if metric != "classification_report":
            print(f"{metric.capitalize()}: {value:.4f}")
    print(f"Classification Report:\n{metrics['classification_report']}")

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Final Test Results:

Random Forest Metrics:
F1_micro: 0.8647
F1_macro: 0.4637
Precision_macro: 0.4323
Recall_macro: 0.5000
Accuracy: 0.8647
Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.93       786
           1       0.00      0.00      0.00       123

    accuracy                           0.86       909
   macro avg       0.43      0.50      0.46       909
weighted avg       0.75      0.86      0.80       909


Bagging Metrics:
F1_micro: 0.8647
F1_macro: 0.4637
Precision_macro: 0.4323
Recall_macro: 0.5000
Accuracy: 0.8647
Classification Report:
              precision    recall  f1-score   support

           0       0.86      1.00      0.93       786
           1       0.00      0.00      0.00       123

    accuracy                           0.86       909
   macro avg       0.43      0.50      0.46       909
weighted avg       0.75      0.86      0.80       909


XGBoost Metrics:
F1_micro: 0.8592
F1_macro: 

In [11]:
# Save results to a pickle file
output_path = "/Users/mac/Desktop/Code_Smell_Detection/feature_envy/results/codeT5/base/result_codeT5_base_avg.pkl"
with open(output_path, "wb") as f:
    pickle.dump({"cross_val_results": results, "final_results": final_results}, f)
print(f"\nResults saved to {output_path}")


Results saved to /Users/mac/Desktop/Code_Smell_Detection/feature_envy/results/codeT5/base/result_codeT5_base_avg.pkl
