In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load your data (assuming final_features is your DataFrame with features and labels)
final_features = pd.read_csv('/content/drive/MyDrive/Minor/concatenated_features.csv')

# Separate features and target variable
X = final_features.drop(columns=['label'])  # Drop the label column
y = final_features['label']  # Target variable

# Step 1: Normalize the Features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42, stratify=y)

# Step 3: Model Training
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Step 4: Model Evaluation
y_pred = clf.predict(X_test)

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       151
           1       0.98      0.99      0.99       146

    accuracy                           0.99       297
   macro avg       0.99      0.99      0.99       297
weighted avg       0.99      0.99      0.99       297

Confusion Matrix:
[[148   3]
 [  1 145]]


In [None]:
final_features.shape

(1482, 2817)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load your data (assuming final_features is your DataFrame with features and labels)
final_features = pd.read_csv('/content/drive/MyDrive/Minor/concatenated_features.csv')

# Separate features and target variable
X = final_features.drop(columns=['label'])  # Drop the label column
y = final_features['label']  # Target variable

# Step 1: Normalize the Feature

# Step 2: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 3: Model Training
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Step 4: Model Evaluation
y_pred = clf.predict(X_test)

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       151
           1       0.98      0.99      0.99       146

    accuracy                           0.99       297
   macro avg       0.99      0.99      0.99       297
weighted avg       0.99      0.99      0.99       297

Confusion Matrix:
[[148   3]
 [  1 145]]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.regularizers import l2

# Load your data (assuming final_features is your DataFrame with features and labels)
final_features = pd.read_csv('/content/drive/MyDrive/Minor/concatenated_features_bert_aug.csv')

# Separate features and target variable
X = final_features.drop(columns=['label'])  # Drop the label column
y = final_features['label']  # Target variable

# Step 2: Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)  # 60% training, 40% temp
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)  # 20% validation, 20% test

# Step 3: Define the ANN Model
def create_model(input_shape):
    model = Sequential()
    # Adding L2 regularization to the Dense layers
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_shape=(input_shape,)))
    model.add(Dropout(0.5))  # Dropout layer with 50% dropout rate
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))  # Another dropout layer
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))  # Another dropout layer
    model.add(Dense(1, activation='sigmoid'))  # Output layer
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create the model
model = create_model(X_train.shape[1])

# Use EarlyStopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model and capture history
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val),
                    callbacks=[early_stopping], verbose=1)  # Change verbose to 1 to see epoch outputs

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print test accuracy
print(f'\nTest Accuracy: {accuracy:.4f}')
y_pred = model.predict(X_test)
y_pred_class = (y_pred > 0.5).astype(int)
# Print the classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred_class))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_class))



Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 101ms/step - accuracy: 0.5234 - loss: 4.3185 - val_accuracy: 0.5000 - val_loss: 3.1907
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.5481 - loss: 3.0809 - val_accuracy: 0.9358 - val_loss: 2.4026
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6895 - loss: 2.2660 - val_accuracy: 0.9764 - val_loss: 1.7665
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8029 - loss: 1.7295 - val_accuracy: 0.9865 - val_loss: 1.2816
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8887 - loss: 1.3541 - val_accuracy: 0.9696 - val_loss: 1.0414
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9258 - loss: 1.0927 - val_accuracy: 0.9932 - val_loss: 0.8476
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report, confusion_matrix
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.regularizers import l2

# Load your data (assuming final_features is your DataFrame with features and labels)
final_features = pd.read_csv('/content/drive/MyDrive/Minor/concatenated_features.csv')

# Separate features and target variable
X = final_features.drop(columns=['label']).values  # Drop the label column and convert to numpy array
y = final_features['label'].values  # Target variable

# Split the data into training, validation, and test sets
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # 80% train + validation, 20% test
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)  # 60% train, 20% validation

# Step 2: Define the ANN Model
def create_model(input_shape):
    model = Sequential()
    # Adding L2 regularization to the Dense layers
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01), input_shape=(input_shape,)))
    model.add(Dropout(0.5))  # Dropout layer with 50% dropout rate
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))  # Another dropout layer
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dropout(0.5))  # Another dropout layer
    model.add(Dense(1, activation='sigmoid'))  # Output layer
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracy_scores = []

for train_index, val_index in kf.split(X_train):
    X_kf_train, X_kf_val = X_train[train_index], X_train[val_index]
    y_kf_train, y_kf_val = y_train[train_index], y_train[val_index]

    # Create the model
    model = create_model(X_kf_train.shape[1])

    # Use EarlyStopping to avoid overfitting
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model and capture history
    history = model.fit(X_kf_train, y_kf_train, epochs=100, batch_size=32, validation_data=(X_kf_val, y_kf_val),
                        callbacks=[early_stopping], verbose=1)  # Change verbose to 1 to see epoch outputs

    # Evaluate the model
    loss, accuracy = model.evaluate(X_kf_val, y_kf_val, verbose=0)
    accuracy_scores.append(accuracy)

# Print the average accuracy across all folds
print(f'Average Accuracy: {np.mean(accuracy_scores):.4f} ± {np.std(accuracy_scores):.4f}')

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print test accuracy
print(f'\nTest Accuracy: {accuracy:.4f}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - accuracy: 0.5514 - loss: 4.3777 - val_accuracy: 0.4775 - val_loss: 3.5274
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5607 - loss: 3.3372 - val_accuracy: 0.7303 - val_loss: 2.7100
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.7039 - loss: 2.5312 - val_accuracy: 0.9775 - val_loss: 1.9624
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8131 - loss: 1.9725 - val_accuracy: 0.9831 - val_loss: 1.5309
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.9090 - loss: 1.5447 - val_accuracy: 0.9775 - val_loss: 1.1955
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9139 - loss: 1.2698 - val_accuracy: 0.9775 - val_loss: 1.0291
Epoch 7/100
[1m23/23[0m [32m━━━━━━━━━

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from keras.callbacks import EarlyStopping

# Load your data (assuming final_features is your DataFrame with features and labels)
final_features = pd.read_csv('/content/drive/MyDrive/Minor/concatenated_features.csv')

# Separate features and target variable
X = final_features.drop(columns=['label']).values  # Drop the label column and convert to numpy array
y = final_features['label'].values  # Target variable

# Reshape for 1D CNN input (samples, timesteps, features)
X_reshaped = X.reshape(X.shape[0], X.shape[1], 1)

# Step 1: Split the data into training and temporary sets (80% train, 20% temp)
X_train, X_temp, y_train, y_temp = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Step 2: Split the temporary set into validation and test sets (50% of temp to val, 50% to test)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Step 3: Define the 1D CNN Model
def create_1d_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))  # Dropout layer with 50% dropout rate
    model.add(Dense(1, activation='sigmoid'))  # Output layer
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Create the model
model = create_1d_cnn_model((X_train.shape[1], 1))

# Use EarlyStopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val),
                    callbacks=[early_stopping], verbose=1)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print test accuracy
print(f'\nTest Accuracy: {accuracy:.4f}')

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 410ms/step - accuracy: 0.6395 - loss: 0.7880 - val_accuracy: 0.9527 - val_loss: 0.1337
Epoch 2/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 472ms/step - accuracy: 0.9644 - loss: 0.1180 - val_accuracy: 1.0000 - val_loss: 0.0127
Epoch 3/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 389ms/step - accuracy: 0.9833 - loss: 0.0524 - val_accuracy: 1.0000 - val_loss: 0.0081
Epoch 4/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 401ms/step - accuracy: 0.9908 - loss: 0.0307 - val_accuracy: 1.0000 - val_loss: 0.0113
Epoch 5/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 395ms/step - accuracy: 0.9912 - loss: 0.0337 - val_accuracy: 1.0000 - val_loss: 0.0031
Epoch 6/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 377ms/step - accuracy: 0.9977 - loss: 0.0105 - val_accuracy: 1.0000 - val_loss: 0.0013
Epoch 7/10
[1m38/38[0m [32m━━━