In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pickle

In [None]:


# Load your dataset (replace 'your_dataset.csv' with the actual file path)
df = pd.read_csv("your_dataset.csv")

# Perform preprocessing on your dataset

# Handle missing values
imputer = SimpleImputer(strategy="mean")  # Replace with your preferred imputation strategy
# Replace 'missing_column' with the name of the column containing missing values
df["missing_column"] = imputer.fit_transform(df[["missing_column"]])

# Encode categorical variables (if any)
# Example: Use label encoding for a categorical column
label_encoder = LabelEncoder()
# Replace 'categorical_column' with the name of the categorical column
df["categorical_column"] = label_encoder.fit_transform(df["categorical_column"])

# Scale numerical features (if needed)
scaler = StandardScaler()
# Replace 'numerical_feature1' and 'numerical_feature2' with your numerical feature columns
df[["numerical_feature1", "numerical_feature2"]] = scaler.fit_transform(df[["numerical_feature1", "numerical_feature2"]])

# Define your feature columns (X) and target column (y)
# Replace 'target_column' with your actual target column name
X = df.drop("target_column", axis=1)
y = df["target_column"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Now you have X_train, X_test (features), y_train, and y_test (target) ready for training your models.

# Train your machine learning models
rf_model = RandomForestClassifier()
gb_model = GradientBoostingClassifier()
svm_model = SVC()

rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)


In [None]:
# Evaluate the models on the testing set
rf_predictions = rf_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)
svm_predictions = svm_model.predict(X_test)


In [None]:
# Combine predictions using majority voting
combined_predictions = (rf_predictions + gb_predictions + svm_predictions) >= 2

# Calculate accuracy of the combined predictions
accuracy = accuracy_score(y_test, combined_predictions)
print(f"Combined Model Accuracy: {accuracy}")

In [None]:
# Save the trained models using pickle
with open("rf_model.pkl", "wb") as rf_file:
    pickle.dump(rf_model, rf_file)

with open("gb_model.pkl", "wb") as gb_file:
    pickle.dump(gb_model, gb_file)

with open("svm_model.pkl", "wb") as svm_file:
    pickle.dump(svm_model, svm_file)