In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd

from keras.utils import to_categorical
import os
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

Mounted at /content/drive


In [2]:
# Function to read data from CSV files
def read_data_from_csv(folder_path):
    dfs = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            file_path = os.path.join(folder_path, filename)
            df = pd.read_csv(file_path)
            dfs.append(df)
    return pd.concat(dfs, ignore_index=True)

In [3]:
# Read data from train and test folders
combined_data = read_data_from_csv('/content/drive/My Drive/datasets/train')


In [4]:
print(len(combined_data))

8952959


In [5]:
# Encoding categorical variables
encoder = LabelEncoder()
combined_data['label'] = encoder.fit_transform(combined_data['label'])


In [6]:
X_train_combined, X_test_combined, y_train_combined, y_test_combined = train_test_split(
    combined_data.drop(columns=['label']),  # Features
    combined_data['label'],  # Target variable
    test_size=0.2,  # 20% of data will be used for testing
    random_state=42,  # For reproducibility
    stratify=combined_data['label']  # Ensure same label proportions in train and test sets
)

In [7]:
# Define base learners
base_learners = [
    ('knn', KNeighborsClassifier()),
    ('svm', SVC())
]

# Define meta-learner
meta_learner = LogisticRegression()


In [8]:
# Create stacking classifier
stacking_classifier = StackingClassifier(estimators=base_learners, final_estimator=meta_learner)


In [None]:
# Train stacking classifier
stacking_classifier.fit(X_train_combined, y_train_combined)

In [None]:
# Make predictions
stacking_predictions = stacking_classifier.predict(X_test_combined)


In [None]:
# Calculate accuracy
stacking_accuracy = accuracy_score(y_test_combined, stacking_predictions)

# Print accuracy
print("Stacking Accuracy:", stacking_accuracy)

In [None]:
# Print classification report
print("Classification Report for Stacking:")
print(classification_report(y_test_combined, stacking_predictions))