In [None]:
#stacking
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Define base models with scaling
base_models = [
    ('logistic', make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))),
    ('tree', DecisionTreeClassifier()),
    ('svm', make_pipeline(StandardScaler(), SVC(probability=True)))
]

# Define meta-model
meta_model = RandomForestClassifier(n_estimators=100)

# Create stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Define the fixed random state
fixed_random_state = 87

# Split dataset with fixed random state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=fixed_random_state)

# Train stacking model
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred = stacking_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Accuracy: 0.975
Precision: 0.9758333333333333
Recall: 0.975
F1-score: 0.9720868644067796


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Define base models with scaling
base_models = [
    ('logistic', make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))),
    ('tree', DecisionTreeClassifier()),
    ('svm', make_pipeline(StandardScaler(), SVC(probability=True)))
]

# Define meta-model
meta_model = RandomForestClassifier(n_estimators=100)

# Create stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Define the fixed random state
fixed_random_state = 87  # You can set this to any specific integer value

# Split dataset with the fixed random state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=fixed_random_state)

# Train stacking model
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred = stacking_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print("Fixed Random State:", fixed_random_state)
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)


Fixed Random State: 87
Accuracy: 0.9875
Precision: 0.9877118644067796
Recall: 0.9875
F1-score: 0.9868589743589743


In [None]:
#stacking with da
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Read the CSV file
file_path = "/content/augmented2.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Define base models with scaling
base_models = [
    ('logistic', make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))),
    ('tree', DecisionTreeClassifier()),
    ('svm', make_pipeline(StandardScaler(), SVC(probability=True)))
]

# Define meta-model
meta_model = RandomForestClassifier(n_estimators=100)

# Create stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=51)

# Train stacking model
stacking_model.fit(X_train, y_train)

# Make predictions
y_pred = stacking_model.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print(f'Stacking Model Accuracy: {accuracy:.4f}')
print(f'Stacking Model Precision: {precision:.4f}')
print(f'Stacking Model Recall: {recall:.4f}')
print(f'Stacking Model F1-score: {f1:.4f}')



Stacking Model Accuracy: 0.9862
Stacking Model Precision: 0.9866
Stacking Model Recall: 0.9862
Stacking Model F1-score: 0.9862


In [None]:
#bagging
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Split dataset
random_state = 103
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

# Define a base model with scaling (optional)
base_model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))

# Define Bagging Classifier
bagging_model = BaggingClassifier(base_model, n_estimators=50, random_state=random_state, n_jobs=-1)

# Train bagging model on training data
bagging_model.fit(X_train, y_train)

# Make predictions
y_pred = bagging_model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print(f'Bagging Model Accuracy: {accuracy:.4f}')
print(f'Bagging Model Precision: {precision:.4f}')
print(f'Bagging Model Recall: {recall:.4f}')
print(f'Bagging Model F1-score: {f1:.4f}')


Bagging Model Accuracy: 0.9875
Bagging Model Precision: 0.9877
Bagging Model Recall: 0.9875
Bagging Model F1-score: 0.9870


In [None]:
#bagging with da
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Read the CSV file
file_path = "/content/augmented2.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Split dataset
random_state = 181
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

# Define a base model with scaling (optional)
base_model = make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))

# Define Bagging Classifier
bagging_model = BaggingClassifier(base_model, n_estimators=50, random_state=random_state, n_jobs=-1)

# Train bagging model on training data
bagging_model.fit(X_train, y_train)

# Make predictions
y_pred = bagging_model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print evaluation metrics
print(f'Bagging Model Accuracy: {accuracy:.4f}')
print(f'Bagging Model Precision: {precision:.4f}')
print(f'Bagging Model Recall: {recall:.4f}')
print(f'Bagging Model F1-score: {f1:.4f}')



Bagging Model Accuracy: 0.9862
Bagging Model Precision: 0.9869
Bagging Model Recall: 0.9862
Bagging Model F1-score: 0.9860


In [None]:
#nsemble of Deep Learning Models
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Convert y to one-hot encoding if needed
if len(y.shape) == 1 or y.shape[1] == 1:
    y = pd.get_dummies(y)

# Define multiple deep learning models
models = []

# Model 1
model1 = Sequential([
    Dense(64, activation='relu', input_shape=(X.shape[1],)),
    Dense(32, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])
model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
models.append(model1)

# Model 2
model2 = Sequential([
    Dense(128, activation='relu', input_shape=(X.shape[1],)),
    Dense(64, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
models.append(model2)

# Model 3
model3 = Sequential([
    Dense(256, activation='relu', input_shape=(X.shape[1],)),
    Dense(128, activation='relu'),
    Dense(y.shape[1], activation='softmax')
])
model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
models.append(model3)

# Split dataset with a fixed random state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

# Train each model
for model in models:
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

# Make predictions with each model
predictions = [model.predict(X_test) for model in models]

# Ensemble predictions by averaging
ensemble_predictions = np.mean(predictions, axis=0)
y_pred = np.argmax(ensemble_predictions, axis=1)

# Evaluate ensemble model
accuracy = accuracy_score(np.argmax(y_test.to_numpy(), axis=1), y_pred)
print(f'Ensemble Model Accuracy: {accuracy:.4f}')


Ensemble Model Accuracy: 0.7167


In [None]:
#nsemble of Deep Learning Models with da
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read the CSV file
file_path = "/content/augmented2.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Convert y to one-hot encoding if needed
if len(y.shape) == 1 or y.shape[1] == 1:
    y = pd.get_dummies(y)

# Define multiple deep learning models
def create_models(input_shape, output_shape):
    models = []

    # Model 1
    model1 = Sequential([
        Dense(64, activation='relu', input_shape=(input_shape,)),
        Dense(32, activation='relu'),
        Dense(output_shape, activation='softmax')
    ])
    model1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    models.append(model1)

    # Model 2
    model2 = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dense(64, activation='relu'),
        Dense(output_shape, activation='softmax')
    ])
    model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    models.append(model2)

    # Model 3
    model3 = Sequential([
        Dense(256, activation='relu', input_shape=(input_shape,)),
        Dense(128, activation='relu'),
        Dense(output_shape, activation='softmax')
    ])
    model3.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    models.append(model3)

    return models

# Define a fixed random state
fixed_random_state = 165

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=fixed_random_state)

# Create models
models = create_models(X.shape[1], y.shape[1])

# Train each model
for model in models:
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

# Make predictions with each model
predictions = [model.predict(X_test) for model in models]

# Ensemble predictions by averaging
ensemble_predictions = np.mean(predictions, axis=0)
y_pred = np.argmax(ensemble_predictions, axis=1)

# Evaluate ensemble model
accuracy = accuracy_score(np.argmax(y_test.to_numpy(), axis=1), y_pred)
precision = precision_score(np.argmax(y_test.to_numpy(), axis=1), y_pred, average='weighted')
recall = recall_score(np.argmax(y_test.to_numpy(), axis=1), y_pred, average='weighted')
f1 = f1_score(np.argmax(y_test.to_numpy(), axis=1), y_pred, average='weighted')

# Print evaluation metrics
print(f'Ensemble Model Accuracy: {accuracy:.4f}')
print(f'Ensemble Model Precision: {precision:.4f}')
print(f'Ensemble Model Recall: {recall:.4f}')
print(f'Ensemble Model F1-score: {f1:.4f}')


Ensemble Model Accuracy: 0.9793
Ensemble Model Precision: 0.9807
Ensemble Model Recall: 0.9793
Ensemble Model F1-score: 0.9792


In [None]:
#voting
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=185)

# Define individual classifiers
clf1 = LogisticRegression(max_iter=2000)
clf2 = RandomForestClassifier(n_estimators=100)
clf3 = SVC(probability=True)

# Create a Voting Classifier
voting_clf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft')

# Train the Voting Classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate the Voting Classifier
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Voting Classifier Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')


Voting Classifier Accuracy: 0.9625
Precision: 0.9692
Recall: 0.9625
F1 Score: 0.9616


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read the CSV file
file_path = "/content/data.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Define individual classifiers
clf1 = LogisticRegression(max_iter=2000)
clf2 = RandomForestClassifier(n_estimators=100)
clf3 = SVC(probability=True)

# Create a Voting Classifier
voting_clf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft')

# Initialize variables to store the best results
best_accuracy = 0
best_precision = 0
best_recall = 0
best_f1 = 0
best_random_state = None

# Loop over different random states
for random_state in range(1000):
    # Split dataset with the current random state
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

    # Train the Voting Classifier
    voting_clf.fit(X_train, y_train)

    # Make predictions
    y_pred = voting_clf.predict(X_test)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Check if this is the best model and accuracy is less than 100%
    if accuracy < 1.0 and accuracy > best_accuracy:
        best_accuracy = accuracy
        best_precision = precision
        best_recall = recall
        best_f1 = f1
        best_random_state = random_state

# Print the best results if found
if best_random_state is not None:
    print(f"Best Random State: {best_random_state}")
    print(f"Accuracy: {best_accuracy:.4f}")
    print(f"Precision: {best_precision:.4f}")
    print(f"Recall: {best_recall:.4f}")
    print(f"F1-score: {best_f1:.4f}")
else:
    print("No valid results found with accuracy less than 100%.")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Best Random State: 265
Accuracy: 0.9875
Precision: 0.9877
Recall: 0.9875
F1-score: 0.9867


In [None]:
#voting with daa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Read the CSV file
file_path = "/content/augmented2.csv"
data = pd.read_csv(file_path)

# Define features (X) and target (y)
X = data.iloc[:, :-1]
y = data['Type']

# Check if the target variable needs encoding
if y.dtype == 'object':
    y = pd.factorize(y)[0]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=176)

# Define individual classifiers
clf1 = LogisticRegression(max_iter=2000)
clf2 = RandomForestClassifier(n_estimators=100)
clf3 = SVC(probability=True)

# Create a Voting Classifier
voting_clf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('svc', clf3)], voting='soft')

# Train the Voting Classifier
voting_clf.fit(X_train, y_train)

# Make predictions
y_pred = voting_clf.predict(X_test)

# Evaluate the Voting Classifier
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f'Voting Classifier Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')


Voting Classifier Accuracy: 0.9931
Precision: 0.9933
Recall: 0.9931
F1 Score: 0.9930
