In [None]:

# Import required libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load all three datasets
train_data = pd.read_csv ("/combined_part1_part3.csv")
test_data_1 = pd.read_csv ("/part2.csv")

#Seperate features and Targets
X_train = train_data.drop("Class", axis=1)
y_train = train_data["Class"]

X_test_1 = test_data_1.drop("Class", axis=1)
y_test_1 = test_data_1["Class"]


# Initialize Logistic Regression model
log_model = LogisticRegression(
    solver='liblinear', #LibLinear for smaller and medium data size
    max_iter=1000,
    random_state=42
)

# Training the model
log_model.fit(X_train, y_train)

# Evaluate on Test Dataset 1
y_pred_1 = log_model.predict(X_test_1)
print(" Results on Test Dataset 1")
print("Accuracy:", accuracy_score(y_test_1, y_pred_1))
print("Confusion Matrix:\n", confusion_matrix(y_test_1, y_pred_1))
print("Classification Report:\n", classification_report(y_test_1, y_pred_1))




 Results on Test Dataset 1
Accuracy: 0.9991573270413753
Confusion Matrix:
 [[94778     5]
 [   75    78]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     94783
           1       0.94      0.51      0.66       153

    accuracy                           1.00     94936
   macro avg       0.97      0.75      0.83     94936
weighted avg       1.00      1.00      1.00     94936



In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load all three datasets
train_data = pd.read_csv ("/combined_part1_part3.csv")
test_data_1 = pd.read_csv ("/part2.csv")

# Separate features and target based on train and test
X_train = train_data.drop("Class", axis=1)
y_train = train_data["Class"]

X_test_1 = test_data_1.drop("Class", axis=1)
y_test_1 = test_data_1["Class"]

# Training the Random Forest with train data
rf_model = RandomForestClassifier(
    n_estimators=100, random_state=42, n_jobs=-1
)
rf_model.fit(X_train, y_train)

#Evaluate on Test Dataset 1
y_pred_1 = rf_model.predict(X_test_1)
print(" Results on Test Dataset 1")
print("Accuracy:", accuracy_score(y_test_1, y_pred_1))
print("Confusion Matrix:\n", confusion_matrix(y_test_1, y_pred_1))
print("Classification Report:\n", classification_report(y_test_1, y_pred_1))

 Results on Test Dataset 1
Accuracy: 0.9995049296368079
Confusion Matrix:
 [[94766    17]
 [   30   123]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     94783
           1       0.88      0.80      0.84       153

    accuracy                           1.00     94936
   macro avg       0.94      0.90      0.92     94936
weighted avg       1.00      1.00      1.00     94936



In [13]:
# Import required libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

train_data = pd.read_csv ("/combined_part1_part3.csv")
test_data_1 = pd.read_csv ("/part2.csv")


# Separate features and target
X_train = train_data.drop("Class", axis=1)
y_train = train_data["Class"]

X_test_1 = test_data_1.drop("Class", axis=1)
y_test_1 = test_data_1["Class"]



# Define base models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)
lr_model = LogisticRegression(solver='liblinear', max_iter=1000, random_state=42)

# Create stacking ensemble (meta-model is Logistic Regression)
estimators = [
    ('rf', rf_model),
    ('lr', lr_model)
]

stack_model = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(solver='liblinear', max_iter=1000, random_state=42),
    n_jobs=-1
)

# Train stacking model
stack_model.fit(X_train, y_train)

# Evaluate on Test Dataset 1
y_pred_1 = stack_model.predict(X_test_1)
print("Results on Test Dataset 1:")
print("Accuracy:", accuracy_score(y_test_1, y_pred_1))
print("Confusion Matrix:\n", confusion_matrix(y_test_1, y_pred_1))
print("Classification Report:\n", classification_report(y_test_1, y_pred_1))



Results on Test Dataset 1:
Accuracy: 0.9987149237380972
Confusion Matrix:
 [[94782     1]
 [  121    32]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     94783
           1       0.97      0.21      0.34       153

    accuracy                           1.00     94936
   macro avg       0.98      0.60      0.67     94936
weighted avg       1.00      1.00      1.00     94936

