# Selecting best model for the PROMISE dataset part-1


### using this dataset "https://github.com/ssea-lab/PROMISE.git"

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("30.csv")
df.head()

Unnamed: 0,name,wmc,dit,noc,cbo,rfc,lcom,ca,ce,npm,...,dam,moa,mfa,cam,ic,cbm,amc,max_cc,avg_cc,bug
0,org.apache.xalan.xsltc.compiler.FilterExpr,7,3,0,19,35,0,0,19,6,...,1.0,1,0.922078,0.306122,2,9,31.0,3,1.5714,1
1,org.apache.xpath.operations.Operation,12,2,14,22,23,0,17,7,12,...,1.0,2,0.731707,0.21875,1,7,11.25,5,1.5833,1
2,org.apache.xalan.xsltc.runtime.BasisLibrary,69,1,0,36,198,2330,20,18,64,...,0.30303,0,0.0,0.102076,0,0,36.217391,49,4.2464,3
3,org.apache.xpath.objects.XString,49,3,2,42,100,1176,33,12,46,...,0.0,1,0.606838,0.122222,3,21,14.081633,16,1.8571,3
4,org.apache.xalan.xsltc.StripFilter,1,1,0,11,1,0,11,1,1,...,0.0,0,0.0,1.0,0,0,0.0,1,1.0,0


In [None]:
print(df.info())

name      0
wmc       0
dit       0
noc       0
cbo       0
rfc       0
lcom      0
ca        0
ce        0
npm       0
lcom3     0
loc       0
dam       0
moa       0
mfa       0
cam       0
ic        0
cbm       0
amc       0
max_cc    0
avg_cc    0
bug       0
dtype: int64
<class 'pandas.DataFrame'>
RangeIndex: 885 entries, 0 to 884
Data columns (total 22 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    885 non-null    str    
 1   wmc     885 non-null    int64  
 2   dit     885 non-null    int64  
 3   noc     885 non-null    int64  
 4   cbo     885 non-null    int64  
 5   rfc     885 non-null    int64  
 6   lcom    885 non-null    int64  
 7   ca      885 non-null    int64  
 8   ce      885 non-null    int64  
 9   npm     885 non-null    int64  
 10  lcom3   885 non-null    float64
 11  loc     885 non-null    int64  
 12  dam     885 non-null    float64
 13  moa     885 non-null    int64  
 14  mfa     885 non-null    float64


### cleaning the dataset

In [5]:
df.head(5)

Unnamed: 0,name,wmc,dit,noc,cbo,rfc,lcom,ca,ce,npm,...,dam,moa,mfa,cam,ic,cbm,amc,max_cc,avg_cc,bug
0,org.apache.xalan.xsltc.compiler.FilterExpr,7,3,0,19,35,0,0,19,6,...,1.0,1,0.922078,0.306122,2,9,31.0,3,1.5714,1
1,org.apache.xpath.operations.Operation,12,2,14,22,23,0,17,7,12,...,1.0,2,0.731707,0.21875,1,7,11.25,5,1.5833,1
2,org.apache.xalan.xsltc.runtime.BasisLibrary,69,1,0,36,198,2330,20,18,64,...,0.30303,0,0.0,0.102076,0,0,36.217391,49,4.2464,3
3,org.apache.xpath.objects.XString,49,3,2,42,100,1176,33,12,46,...,0.0,1,0.606838,0.122222,3,21,14.081633,16,1.8571,3
4,org.apache.xalan.xsltc.StripFilter,1,1,0,11,1,0,11,1,1,...,0.0,0,0.0,1.0,0,0,0.0,1,1.0,0


In [7]:
# 1. Drop non-useful column
df = df.drop("name", axis=1)

# 2. Convert bug count → binary classification
df['bug'] = df['bug'].apply(lambda x: 1 if x > 0 else 0)

# 3. Check class distribution
print(df['bug'].value_counts())


bug
0    474
1    411
Name: count, dtype: int64


### initialising x and y 

In [9]:
x = df.drop('bug',axis=1)
y = df['bug'] #taget column

### train-test split

In [12]:
from sklearn.model_selection import train_test_split

x_train , x_test, y_train, y_test = train_test_split(
    x,y,test_size=0.2, random_state=42
)

# feature scaling

In [13]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

## decision tree model

In [14]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Create model
dt_model = DecisionTreeClassifier(random_state=42)

# Train
dt_model.fit(x_train, y_train)

# Predict
y_pred_dt = dt_model.predict(x_test)

# Accuracy
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))

# Confusion Matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_dt))


Decision Tree Accuracy: 0.7231638418079096

Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.76      0.75        99
           1       0.69      0.68      0.68        78

    accuracy                           0.72       177
   macro avg       0.72      0.72      0.72       177
weighted avg       0.72      0.72      0.72       177


Confusion Matrix:
 [[75 24]
 [25 53]]


## hyperparameter tuning of decision-tree



In [15]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

model = DecisionTreeClassifier(random_state=42)

params = {
    "criterion": ["gini", "entropy", "log_loss"],
    "splitter": ["best", "random"],

    # Tree size control (MOST IMPORTANT)
    "max_depth": [None, 3, 5, 7, 9, 12, 15, 20],

    # Minimum samples to split a node
    "min_samples_split": [2, 5, 10, 20, 50],

    # Minimum samples at leaf node
    "min_samples_leaf": [1, 2, 5, 10, 20],

    # Number of features considered at each split
    "max_features": [None, "sqrt", "log2"],

    # Pruning parameter (very powerful)
    "ccp_alpha": [0.0, 0.0001, 0.001, 0.01, 0.1]
}


grid = GridSearchCV(
    estimator=model,
    param_grid=params,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

grid.fit(x_train, y_train)

print("Best Params:", grid.best_params_)
print("Best Score:", grid.best_score_)




Fitting 5 folds for each of 18000 candidates, totalling 90000 fits
Best Params: {'ccp_alpha': 0.01, 'criterion': 'entropy', 'max_depth': 5, 'max_features': None, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
Best Score: 0.7584856657676555


In [16]:
best_rf = grid.best_estimator_
best_rf

0,1,2
,"criterion  criterion: {""gini"", ""entropy"", ""log_loss""}, default=""gini"" The function to measure the quality of a split. Supported criteria are ""gini"" for the Gini impurity and ""log_loss"" and ""entropy"" both for the Shannon information gain, see :ref:`tree_mathematical_formulation`.",'entropy'
,"splitter  splitter: {""best"", ""random""}, default=""best"" The strategy used to choose the split at each node. Supported strategies are ""best"" to choose the best split and ""random"" to choose the best random split.",'best'
,"max_depth  max_depth: int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",5
,"min_samples_split  min_samples_split: int or float, default=2 The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and  `ceil(min_samples_split * n_samples)` are the minimum  number of samples for each split. .. versionchanged:: 0.18  Added float values for fractions.",5
,"min_samples_leaf  min_samples_leaf: int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and  `ceil(min_samples_leaf * n_samples)` are the minimum  number of samples for each node. .. versionchanged:: 0.18  Added float values for fractions.",1
,"min_weight_fraction_leaf  min_weight_fraction_leaf: float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.",0.0
,"max_features  max_features: int, float or {""sqrt"", ""log2""}, default=None The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and  `max(1, int(max_features * n_features_in_))` features are considered at  each split. - If ""sqrt"", then `max_features=sqrt(n_features)`. - If ""log2"", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. .. note::  The search for a split does not stop until at least one  valid partition of the node samples is found, even if it requires to  effectively inspect more than ``max_features`` features.",
,"random_state  random_state: int, RandomState instance or None, default=None Controls the randomness of the estimator. The features are always randomly permuted at each split, even if ``splitter`` is set to ``""best""``. When ``max_features < n_features``, the algorithm will select ``max_features`` at random at each split before finding the best split among them. But the best found split may vary across different runs, even if ``max_features=n_features``. That is the case, if the improvement of the criterion is identical for several splits and one split has to be selected at random. To obtain a deterministic behaviour during fitting, ``random_state`` has to be fixed to an integer. See :term:`Glossary ` for details.",42
,"max_leaf_nodes  max_leaf_nodes: int, default=None Grow a tree with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.",
,"min_impurity_decrease  min_impurity_decrease: float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following::  N_t / N * (impurity - N_t_R / N_t * right_impurity  - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19",0.0


In [17]:
y_pred_dt_tuned = best_rf.predict(x_test)


In [18]:

# Accuracy
print("Tuned Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt_tuned))

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt_tuned))

# Confusion Matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_dt_tuned))


Tuned Decision Tree Accuracy: 0.7005649717514124

Classification Report:
               precision    recall  f1-score   support

           0       0.71      0.78      0.74        99
           1       0.68      0.60      0.64        78

    accuracy                           0.70       177
   macro avg       0.70      0.69      0.69       177
weighted avg       0.70      0.70      0.70       177


Confusion Matrix:
 [[77 22]
 [31 47]]


In [19]:

print("Normal DT accuracy = ", accuracy_score(y_test,y_pred_dt))
print("Tuned DT accuracy ", accuracy_score(y_test,y_pred_dt_tuned))

Normal DT accuracy =  0.7231638418079096
Tuned DT accuracy  0.7005649717514124


#### so in the case of DT normal DT performed more then tuned DT

# random forest model

In [20]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=12,
    min_samples_split=10,
    min_samples_leaf=4,
    max_features='sqrt',
    bootstrap=True,
    oob_score=True,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf_model.fit(x_train, y_train)

print("OOB Score:", rf_model.oob_score_)


# Train
rf_model.fit(x_train, y_train)

# Predict
y_pred_rf = rf_model.predict(x_test)

# Accuracy
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))

# Confusion Matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))


OOB Score: 0.7768361581920904
Random Forest Accuracy: 0.807909604519774

Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.87      0.83        99
           1       0.81      0.73      0.77        78

    accuracy                           0.81       177
   macro avg       0.81      0.80      0.80       177
weighted avg       0.81      0.81      0.81       177


Confusion Matrix:
 [[86 13]
 [21 57]]


# HyperParameter tuning of the RandomForest

In [21]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

rf = RandomForestClassifier(random_state=42)

param_dist = {
    "n_estimators": np.arange(200, 800, 100),
    "max_depth": [None, 8, 10, 12, 15, 20],
    "min_samples_split": [2, 5, 10, 20, 50],
    "min_samples_leaf": [1, 2, 4, 8, 16],
    "max_features": ["sqrt", "log2", None],
    "bootstrap": [True, False]
}

random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=40,           # number of combinations to try
    cv=5,
    scoring="accuracy",
    verbose=2,
    n_jobs=-1,
    random_state=42
)

random_search.fit(x_train, y_train)

print("Best Parameters:", random_search.best_params_)
print("Best CV Score:", random_search.best_score_)


Fitting 5 folds for each of 40 candidates, totalling 200 fits
Best Parameters: {'n_estimators': np.int64(600), 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'max_depth': None, 'bootstrap': False}
Best CV Score: 0.7797223054639897


In [22]:
best_rf = random_search.best_estimator_
y_pred_rf_tuned = best_rf.predict(x_test)

In [23]:
# Accuracy
print("Tuned Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf_tuned))

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf_tuned))

# Confusion Matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_rf_tuned))


Tuned Random Forest Accuracy: 0.7796610169491526

Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.82      0.81        99
           1       0.76      0.73      0.75        78

    accuracy                           0.78       177
   macro avg       0.78      0.77      0.78       177
weighted avg       0.78      0.78      0.78       177


Confusion Matrix:
 [[81 18]
 [21 57]]


In [24]:
print("Normal Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Tuned Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf_tuned))



Normal Random Forest Accuracy: 0.807909604519774
Tuned Random Forest Accuracy: 0.7796610169491526


#### normal random forest accuracy is more then tuned one

In [25]:
X_train = x_train
X_test = x_test

## KNN MODEL


In [26]:
from sklearn.neighbors import KNeighborsClassifier

# Create model
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train
knn_model.fit(X_train, y_train)

# Predict
y_pred_knn = knn_model.predict(X_test)

# Accuracy
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn))

# Confusion Matrix
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))


KNN Accuracy: 0.7344632768361582

Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.82      0.78        99
           1       0.73      0.63      0.68        78

    accuracy                           0.73       177
   macro avg       0.73      0.72      0.73       177
weighted avg       0.73      0.73      0.73       177


Confusion Matrix:
 [[81 18]
 [29 49]]


# hyperparameter tunining of KNN

In [27]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()

param_grid = {
    "n_neighbors": range(1, 31),
    "weights": ["uniform", "distance"],
    "metric": ["minkowski", "euclidean", "manhattan"],
    "p": [1, 2]
}

grid_knn = GridSearchCV(
    estimator=knn,
    param_grid=param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

grid_knn.fit(X_train, y_train)

print("Best Params:", grid_knn.best_params_)
print("Best CV Score:", grid_knn.best_score_)


Best Params: {'metric': 'minkowski', 'n_neighbors': 17, 'p': 1, 'weights': 'uniform'}
Best CV Score: 0.7528718409749277


In [28]:
best_knn = grid_knn.best_estimator_

y_pred_knn_tuned = best_knn.predict(X_test)

print("Tuned KNN Accuracy:", accuracy_score(y_test, y_pred_knn_tuned))
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn_tuned))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_knn_tuned))


Tuned KNN Accuracy: 0.7853107344632768

Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.93      0.83        99
           1       0.87      0.60      0.71        78

    accuracy                           0.79       177
   macro avg       0.81      0.77      0.77       177
weighted avg       0.80      0.79      0.78       177


Confusion Matrix:
 [[92  7]
 [31 47]]


In [29]:
print("Normal KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Tuned KNN Accuracy:", accuracy_score(y_test, y_pred_knn_tuned))


Normal KNN Accuracy: 0.7344632768361582
Tuned KNN Accuracy: 0.7853107344632768


#### tuned KNN accuracy is more then the normal KNN

## ANN MODEL

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Build ANN model
ann_model = Sequential()

ann_model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
ann_model.add(Dense(32, activation='relu'))
ann_model.add(Dense(1, activation='sigmoid'))  # binary output

# Compile
ann_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = ann_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
loss, acc = ann_model.evaluate(X_test, y_test)

print("ANN Accuracy:", acc)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.6396 - loss: 0.6493 - val_accuracy: 0.7254 - val_loss: 0.6157
Epoch 2/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7297 - loss: 0.5692 - val_accuracy: 0.7254 - val_loss: 0.5694
Epoch 3/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7544 - loss: 0.5325 - val_accuracy: 0.7324 - val_loss: 0.5495
Epoch 4/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7597 - loss: 0.5101 - val_accuracy: 0.7254 - val_loss: 0.5391
Epoch 5/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7615 - loss: 0.4961 - val_accuracy: 0.7324 - val_loss: 0.5341
Epoch 6/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7668 - loss: 0.4868 - val_accuracy: 0.7394 - val_loss: 0.5326
Epoch 7/100
[1m18/18[0m [32m━━━━━━━━━━━━━━

In [31]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Convert probabilities → 0/1
y_pred_ann = (ann_model.predict(X_test) > 0.5).astype("int32")

print("\nClassification Report:\n", classification_report(y_test, y_pred_ann))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_ann))


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.88      0.80        99
           1       0.80      0.60      0.69        78

    accuracy                           0.76       177
   macro avg       0.77      0.74      0.74       177
weighted avg       0.76      0.76      0.75       177


Confusion Matrix:
 [[87 12]
 [31 47]]


# tuning ANN model

In [32]:
from tensorflow.keras.layers import Dropout

ann_model = Sequential()

ann_model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
ann_model.add(Dropout(0.3))

ann_model.add(Dense(64, activation='relu'))
ann_model.add(Dropout(0.3))

ann_model.add(Dense(1, activation='sigmoid'))


# Compile
ann_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

# Train
history = ann_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
loss, acc = ann_model.evaluate(X_test, y_test)

print("Trained ANN Accuracy:", acc)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6590 - loss: 0.6488 - val_accuracy: 0.7042 - val_loss: 0.5698
Epoch 2/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7208 - loss: 0.5699 - val_accuracy: 0.7606 - val_loss: 0.5322
Epoch 3/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7597 - loss: 0.5254 - val_accuracy: 0.7535 - val_loss: 0.5167
Epoch 4/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7350 - loss: 0.5344 - val_accuracy: 0.7394 - val_loss: 0.5074
Epoch 5/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7208 - loss: 0.5311 - val_accuracy: 0.7535 - val_loss: 0.5039
Epoch 6/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7226 - loss: 0.5098 - val_accuracy: 0.7676 - val_loss: 0.5001
Epoch 7/100
[1m18/18[0m [32m━━━━━━━━━━━━━━

In [33]:
# Convert probabilities → 0/1
y_pred_ann_tuned = (ann_model.predict(X_test) > 0.5).astype("int32")

print("\nClassification Report:\n", classification_report(y_test, y_pred_ann_tuned))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_ann_tuned))

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.86      0.81        99
           1       0.79      0.67      0.72        78

    accuracy                           0.77       177
   macro avg       0.78      0.76      0.77       177
weighted avg       0.78      0.77      0.77       177


Confusion Matrix:
 [[85 14]
 [26 52]]


In [34]:
print("Normal ANN Classification Report:\n", classification_report(y_test, y_pred_ann))
print("\nTuned ANN Classification Report:\n", classification_report(y_test, y_pred_ann_tuned))


Normal ANN Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.88      0.80        99
           1       0.80      0.60      0.69        78

    accuracy                           0.76       177
   macro avg       0.77      0.74      0.74       177
weighted avg       0.76      0.76      0.75       177


Tuned ANN Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.86      0.81        99
           1       0.79      0.67      0.72        78

    accuracy                           0.77       177
   macro avg       0.78      0.76      0.77       177
weighted avg       0.78      0.77      0.77       177



#### tuned ANN has better accuracy then the normal one

## DNN model

In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Build DNN model
dnn_model = Sequential()

dnn_model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
dnn_model.add(Dropout(0.3))

dnn_model.add(Dense(64, activation='relu'))
dnn_model.add(Dropout(0.3))

dnn_model.add(Dense(32, activation='relu'))
dnn_model.add(Dense(1, activation='sigmoid'))

# Compile
dnn_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

# Train
history = dnn_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=80,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
loss, acc = dnn_model.evaluate(X_test, y_test)
print("DNN Accuracy:", acc)


Epoch 1/80


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.6219 - loss: 0.6519 - val_accuracy: 0.6831 - val_loss: 0.6157
Epoch 2/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6714 - loss: 0.6102 - val_accuracy: 0.7254 - val_loss: 0.5720
Epoch 3/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7067 - loss: 0.5628 - val_accuracy: 0.7183 - val_loss: 0.5395
Epoch 4/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7350 - loss: 0.5326 - val_accuracy: 0.7183 - val_loss: 0.5253
Epoch 5/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7562 - loss: 0.5241 - val_accuracy: 0.7535 - val_loss: 0.5161
Epoch 6/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7473 - loss: 0.5144 - val_accuracy: 0.7606 - val_loss: 0.5084
Epoch 7/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━

In [36]:
y_pred_dnn = (dnn_model.predict(X_test) > 0.5).astype("int32")

print("\nClassification Report:\n", classification_report(y_test, y_pred_dnn))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_dnn))


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step

Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.91      0.83        99
           1       0.85      0.65      0.74        78

    accuracy                           0.80       177
   macro avg       0.81      0.78      0.79       177
weighted avg       0.80      0.80      0.79       177


Confusion Matrix:
 [[90  9]
 [27 51]]


# DNN hyperparameter tuning

In [37]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

dnn_model = Sequential()

dnn_model.add(Dense(128, activation='relu', input_shape=(X_train.shape[1],)))
dnn_model.add(BatchNormalization())
dnn_model.add(Dropout(0.3))

dnn_model.add(Dense(64, activation='relu'))
dnn_model.add(BatchNormalization())
dnn_model.add(Dropout(0.3))

dnn_model.add(Dense(32, activation='relu'))
dnn_model.add(Dropout(0.2))

dnn_model.add(Dense(1, activation='sigmoid'))

dnn_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

history = dnn_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=80,
    batch_size=32,
    callbacks=[early_stop],
    verbose=1
)

loss, acc = dnn_model.evaluate(X_test, y_test)
print("tuned DNN Accuracy:", acc)


Epoch 1/80


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5406 - loss: 1.0418 - val_accuracy: 0.6127 - val_loss: 0.6409
Epoch 2/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6413 - loss: 0.7073 - val_accuracy: 0.7042 - val_loss: 0.6135
Epoch 3/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6784 - loss: 0.6426 - val_accuracy: 0.6620 - val_loss: 0.6085
Epoch 4/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6749 - loss: 0.6261 - val_accuracy: 0.6761 - val_loss: 0.6026
Epoch 5/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7120 - loss: 0.5633 - val_accuracy: 0.7113 - val_loss: 0.5910
Epoch 6/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7226 - loss: 0.5744 - val_accuracy: 0.7042 - val_loss: 0.5751
Epoch 7/80
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━

In [38]:
y_pred_dnn_tuned = (dnn_model.predict(X_test) > 0.5).astype("int32")

print("\nClassification Report:\n", classification_report(y_test, y_pred_dnn_tuned))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_dnn_tuned))

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.86      0.79        99
           1       0.77      0.60      0.68        78

    accuracy                           0.75       177
   macro avg       0.75      0.73      0.73       177
weighted avg       0.75      0.75      0.74       177


Confusion Matrix:
 [[85 14]
 [31 47]]


In [39]:
print("\nNormal DNN Classification Report:\n", classification_report(y_test, y_pred_dnn))
print("\nTuned DNN Classification Report:\n", classification_report(y_test, y_pred_dnn_tuned))



Normal DNN Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.91      0.83        99
           1       0.85      0.65      0.74        78

    accuracy                           0.80       177
   macro avg       0.81      0.78      0.79       177
weighted avg       0.80      0.80      0.79       177


Tuned DNN Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.86      0.79        99
           1       0.77      0.60      0.68        78

    accuracy                           0.75       177
   macro avg       0.75      0.73      0.73       177
weighted avg       0.75      0.75      0.74       177



#### tuned one has better accuracy then the normal one

# FINAL COMPARISION OF ALL MODELS


In [40]:
from sklearn.metrics import precision_score, recall_score, f1_score

results = {}

def evaluate_model(name, y_true, y_pred):
    results[name] = {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1 Score": f1_score(y_true, y_pred)
    }

evaluate_model("Decision Tree", y_test, y_pred_dt)
evaluate_model("Random Forest", y_test, y_pred_rf)
evaluate_model("KNN", y_test, y_pred_knn_tuned) # tuned one had better accuracy
evaluate_model("ANN", y_test, y_pred_ann_tuned) # tuned one had better accuracy
evaluate_model("DNN", y_test, y_pred_dnn_tuned) #tuned one has better accuracy


In [41]:
results_df = pd.DataFrame(results).T 
print(results_df)


               Accuracy  Precision    Recall  F1 Score
Decision Tree  0.723164   0.688312  0.679487  0.683871
Random Forest  0.807910   0.814286  0.730769  0.770270
KNN            0.785311   0.870370  0.602564  0.712121
ANN            0.774011   0.787879  0.666667  0.722222
DNN            0.745763   0.770492  0.602564  0.676259


## Conclusion :- Random Forest performed best for the PROMISE dataset with an accuracy of 80%.

# Selecting best model for the PROMISE dataset part:- 2

### using this dataset :- "https://github.com/alrz1999/PROMISE-dataset-csv.git"

### imports and setup

In [42]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


### loading dataset

In [43]:
df = pd.read_csv("ant-1.3_ground-truth-files_dataset.csv")

print("Shape:", df.shape)
df.head()


Shape: (116, 3)


Unnamed: 0,File,Bug,SRC
0,org/apache/tools/ant/taskdefs/ExecuteOn.java,False,"/*\n * The Apache Software License, Version 1...."
1,org/apache/tools/ant/DefaultLogger.java,True,"/*\n * The Apache Software License, Version 1...."
2,org/apache/tools/ant/taskdefs/TaskOutputStream...,False,"/*\n * The Apache Software License, Version 1...."
3,org/apache/tools/ant/taskdefs/Cvs.java,False,"/*\n * The Apache Software License, Version 1...."
4,org/apache/tools/ant/taskdefs/Copyfile.java,False,"/*\n * The Apache Software License, Version 1...."


In [44]:
df.isnull().sum()

File    0
Bug     0
SRC     0
dtype: int64

### convert the label to numberic

In [46]:
df['Bug']

0      False
1       True
2      False
3      False
4      False
       ...  
111    False
112    False
113     True
114    False
115    False
Name: Bug, Length: 116, dtype: bool

In [47]:
df['Bug'] = df['Bug'].astype(int)
df['Bug'].head()


0    0
1    1
2    0
3    0
4    0
Name: Bug, dtype: int64

## Cleaning the source code

In [48]:
def clean_code(code):
    code = code.lower()
    
    # remove single-line and multi-line comments
    code = re.sub(r'//.*?\n|/\*.*?\*/', ' ', code, flags=re.S)
    
    # keep only words (remove symbols, numbers, brackets)
    code = re.sub(r'[^a-zA-Z_]', ' ', code)
    
    # remove extra spaces
    code = re.sub(r'\s+', ' ', code)
    
    return code.strip()

df['clean_code'] = df['SRC'].apply(clean_code)

df[['SRC','clean_code']].head(3)


Unnamed: 0,SRC,clean_code
0,"/*\n * The Apache Software License, Version 1....",package org apache tools ant taskdefs import o...
1,"/*\n * The Apache Software License, Version 1....",package org apache tools ant import java io pu...
2,"/*\n * The Apache Software License, Version 1....",package org apache tools ant taskdefs import o...


### converting code to numbers using TF_IDF

In [49]:
vectorizer = TfidfVectorizer(
    max_features=5000,     # vocabulary size
    ngram_range=(1,2)      # single words + word pairs (very important for code)
)

X = vectorizer.fit_transform(df['clean_code'])
y = df['Bug']

print("Vectorization Complete")
print("Feature Matrix Shape:", X.shape)


Vectorization Complete
Feature Matrix Shape: (116, 5000)


### Train-Test split

In [50]:
x_train, x_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,      
    random_state=42
)

# Training Random Forest model

In [51]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=None,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

rf_model.fit(x_train, y_train)


0,1,2
,"n_estimators  n_estimators: int, default=100 The number of trees in the forest. .. versionchanged:: 0.22  The default value of ``n_estimators`` changed from 10 to 100  in 0.22.",300
,"criterion  criterion: {""gini"", ""entropy"", ""log_loss""}, default=""gini"" The function to measure the quality of a split. Supported criteria are ""gini"" for the Gini impurity and ""log_loss"" and ""entropy"" both for the Shannon information gain, see :ref:`tree_mathematical_formulation`. Note: This parameter is tree-specific.",'gini'
,"max_depth  max_depth: int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.",
,"min_samples_split  min_samples_split: int or float, default=2 The minimum number of samples required to split an internal node: - If int, then consider `min_samples_split` as the minimum number. - If float, then `min_samples_split` is a fraction and  `ceil(min_samples_split * n_samples)` are the minimum  number of samples for each split. .. versionchanged:: 0.18  Added float values for fractions.",2
,"min_samples_leaf  min_samples_leaf: int or float, default=1 The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. - If int, then consider `min_samples_leaf` as the minimum number. - If float, then `min_samples_leaf` is a fraction and  `ceil(min_samples_leaf * n_samples)` are the minimum  number of samples for each node. .. versionchanged:: 0.18  Added float values for fractions.",1
,"min_weight_fraction_leaf  min_weight_fraction_leaf: float, default=0.0 The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided.",0.0
,"max_features  max_features: {""sqrt"", ""log2"", None}, int or float, default=""sqrt"" The number of features to consider when looking for the best split: - If int, then consider `max_features` features at each split. - If float, then `max_features` is a fraction and  `max(1, int(max_features * n_features_in_))` features are considered at each  split. - If ""sqrt"", then `max_features=sqrt(n_features)`. - If ""log2"", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. .. versionchanged:: 1.1  The default of `max_features` changed from `""auto""` to `""sqrt""`. Note: the search for a split does not stop until at least one valid partition of the node samples is found, even if it requires to effectively inspect more than ``max_features`` features.",'sqrt'
,"max_leaf_nodes  max_leaf_nodes: int, default=None Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. If None then unlimited number of leaf nodes.",
,"min_impurity_decrease  min_impurity_decrease: float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. The weighted impurity decrease equation is the following::  N_t / N * (impurity - N_t_R / N_t * right_impurity  - N_t_L / N_t * left_impurity) where ``N`` is the total number of samples, ``N_t`` is the number of samples at the current node, ``N_t_L`` is the number of samples in the left child, and ``N_t_R`` is the number of samples in the right child. ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, if ``sample_weight`` is passed. .. versionadded:: 0.19",0.0
,"bootstrap  bootstrap: bool, default=True Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree.",True


In [52]:
y_pred_rf = rf_model.predict(x_test)

from sklearn.metrics import classification_report
normal_random_forest_report = classification_report(y_test,y_pred_rf)
print(normal_random_forest_report)


              precision    recall  f1-score   support

           0       0.83      1.00      0.91        20
           1       0.00      0.00      0.00         4

    accuracy                           0.83        24
   macro avg       0.42      0.50      0.45        24
weighted avg       0.69      0.83      0.76        24



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


### hyperprameter tuning of Random Forest


In [53]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
import numpy as np


param_dist = {
    "n_estimators": [100, 200, 300, 500, 700],
    "max_depth": [None, 5, 10, 20, 40],
    "min_samples_split": [2, 5, 10, 20],
    "min_samples_leaf": [1, 2, 4, 8],
    "max_features": ["sqrt", "log2", 0.3, 0.5, 0.7],
    "class_weight": ["balanced", "balanced_subsample"]
}

rf = RandomForestClassifier(random_state=42, n_jobs=-1)

random_search = RandomizedSearchCV(
    estimator=rf,
    param_distributions=param_dist,
    n_iter=25,            
    scoring='recall',       
    cv=4,
    verbose=2,
    random_state=42,
    n_jobs=-1
)

random_search.fit(x_train, y_train)

print("Best Parameters:\n", random_search.best_params_)


Fitting 4 folds for each of 25 candidates, totalling 100 fits
Best Parameters:
 {'n_estimators': 100, 'min_samples_split': 20, 'min_samples_leaf': 4, 'max_features': 0.5, 'max_depth': 10, 'class_weight': 'balanced'}


In [54]:
best_rf = random_search.best_estimator_

y_pred_tuned = best_rf.predict(x_test)

tuned_random_forest_report = classification_report(y_test, y_pred_tuned)
print(tuned_random_forest_report)


              precision    recall  f1-score   support

           0       0.89      0.85      0.87        20
           1       0.40      0.50      0.44         4

    accuracy                           0.79        24
   macro avg       0.65      0.68      0.66        24
weighted avg       0.81      0.79      0.80        24



## Training SVM model

In [55]:
from sklearn.svm import LinearSVC

svm_model = LinearSVC(class_weight='balanced')

svm_model.fit(x_train, y_train)


0,1,2
,"penalty  penalty: {'l1', 'l2'}, default='l2' Specifies the norm used in the penalization. The 'l2' penalty is the standard used in SVC. The 'l1' leads to ``coef_`` vectors that are sparse.",'l2'
,"loss  loss: {'hinge', 'squared_hinge'}, default='squared_hinge' Specifies the loss function. 'hinge' is the standard SVM loss (used e.g. by the SVC class) while 'squared_hinge' is the square of the hinge loss. The combination of ``penalty='l1'`` and ``loss='hinge'`` is not supported.",'squared_hinge'
,"dual  dual: ""auto"" or bool, default=""auto"" Select the algorithm to either solve the dual or primal optimization problem. Prefer dual=False when n_samples > n_features. `dual=""auto""` will choose the value of the parameter automatically, based on the values of `n_samples`, `n_features`, `loss`, `multi_class` and `penalty`. If `n_samples` < `n_features` and optimizer supports chosen `loss`, `multi_class` and `penalty`, then dual will be set to True, otherwise it will be set to False. .. versionchanged:: 1.3  The `""auto""` option is added in version 1.3 and will be the default  in version 1.5.",'auto'
,"tol  tol: float, default=1e-4 Tolerance for stopping criteria.",0.0001
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",1.0
,"multi_class  multi_class: {'ovr', 'crammer_singer'}, default='ovr' Determines the multi-class strategy if `y` contains more than two classes. ``""ovr""`` trains n_classes one-vs-rest classifiers, while ``""crammer_singer""`` optimizes a joint objective over all classes. While `crammer_singer` is interesting from a theoretical perspective as it is consistent, it is seldom used in practice as it rarely leads to better accuracy and is more expensive to compute. If ``""crammer_singer""`` is chosen, the options loss, penalty and dual will be ignored.",'ovr'
,"fit_intercept  fit_intercept: bool, default=True Whether or not to fit an intercept. If set to True, the feature vector is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where 1 corresponds to the intercept. If set to False, no intercept will be used in calculations (i.e. data is expected to be already centered).",True
,"intercept_scaling  intercept_scaling: float, default=1.0 When `fit_intercept` is True, the instance vector x becomes ``[x_1, ..., x_n, intercept_scaling]``, i.e. a ""synthetic"" feature with a constant value equal to `intercept_scaling` is appended to the instance vector. The intercept becomes intercept_scaling * synthetic feature weight. Note that liblinear internally penalizes the intercept, treating it like any other term in the feature vector. To reduce the impact of the regularization on the intercept, the `intercept_scaling` parameter can be set to a value greater than 1; the higher the value of `intercept_scaling`, the lower the impact of regularization on it. Then, the weights become `[w_x_1, ..., w_x_n, w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent the feature weights and the intercept weight is scaled by `intercept_scaling`. This scaling allows the intercept term to have a different regularization behavior compared to the other features.",1
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to ``class_weight[i]*C`` for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",'balanced'
,"verbose  verbose: int, default=0 Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in liblinear that, if enabled, may not work properly in a multithreaded context.",0


In [56]:

# prediction and evalution
y_pred = svm_model.predict(x_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n")

normal_svm_report = classification_report(y_test,y_pred)
print(normal_svm_report)


Accuracy: 0.875

Classification Report:

              precision    recall  f1-score   support

           0       0.87      1.00      0.93        20
           1       1.00      0.25      0.40         4

    accuracy                           0.88        24
   macro avg       0.93      0.62      0.67        24
weighted avg       0.89      0.88      0.84        24



## Preparing the data for DEEP LEARNING

### Tokenization

In [59]:


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_words = 10000
max_len = 400

tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")
tokenizer.fit_on_texts(df['clean_code'])

sequences = tokenizer.texts_to_sequences(df['clean_code'])
X_seq = pad_sequences(sequences, maxlen=max_len)

y_seq = df['Bug'].values

print("Tokenization complete")
print("Shape:", X_seq.shape)


Tokenization complete
Shape: (116, 400)


In [60]:
x_train, x_test, y_train, y_test = train_test_split(
    X_seq, y_seq,
    test_size=0.2,
    stratify=y_seq,
    random_state=42
)


## Training BiLSTM MODEL

In [62]:
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_shape=(400,)),

    Bidirectional(LSTM(64)),

    Dropout(0.5),

    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)



  super().__init__(**kwargs)


In [63]:
history = model.fit(
    x_train, y_train,
    epochs=8,
    batch_size=8,
    validation_split=0.2,
    verbose=1
)


Epoch 1/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 166ms/step - accuracy: 0.8356 - loss: 0.6140 - val_accuracy: 0.8947 - val_loss: 0.4747
Epoch 2/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 134ms/step - accuracy: 0.8493 - loss: 0.3356 - val_accuracy: 0.8947 - val_loss: 0.3763
Epoch 3/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 124ms/step - accuracy: 0.8767 - loss: 0.2908 - val_accuracy: 0.8947 - val_loss: 0.3922
Epoch 4/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 128ms/step - accuracy: 0.8767 - loss: 0.3782 - val_accuracy: 0.8421 - val_loss: 0.3530
Epoch 5/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 139ms/step - accuracy: 0.9726 - loss: 0.2021 - val_accuracy: 0.8947 - val_loss: 0.3636
Epoch 6/8
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 130ms/step - accuracy: 0.9726 - loss: 0.1715 - val_accuracy: 0.8421 - val_loss: 0.3484
Epoch 7/8
[1m10/10[0m [32m━━━━━

#### Evaluting BiLSTM model

In [64]:

import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# Predict probabilities
y_prob = model.predict(x_test)

# Convert to class (default threshold 0.5)
y_pred = (y_prob >= 0.5).astype(int)

normal_BiLSTM_report = classification_report(y_test, y_pred)
print(normal_BiLSTM_report)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289ms/step
              precision    recall  f1-score   support

           0       0.90      0.95      0.93        20
           1       0.67      0.50      0.57         4

    accuracy                           0.88        24
   macro avg       0.79      0.72      0.75        24
weighted avg       0.87      0.88      0.87        24



## conclusion :- BiLSTM performed best among others with an accuracy of 88%.