In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

# Load your dataset
# Assuming your data is in a CSV file named 'your_data.csv'
data = pd.read_csv('Robot_train_4s.csv')

# 1. Data Preprocessing
# Example: Handle missing values
data = data.dropna()

# Example: Feature Scaling
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data.drop('Label', axis=1))  # Assuming 'label' is the column name for your output

# 2. Splitting the Data
X_train, X_test, y_train, y_test = train_test_split(data_scaled, data['Label'], test_size=0.2, random_state=42)

In [3]:
# 3. Building Multiple ML Models
# Example: Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# 4. Model Evaluation
y_pred = rf_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Accuracy: {accuracy}')

# 5. Feature Selection using RFE
# Assuming you want to keep 5 features
num_features_to_keep = 5
rfe = RFE(estimator=rf_model, n_features_to_select=num_features_to_keep)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)

# 6. Fine-tuning (Optional)
# Example: Hyperparameter tuning for Random Forest
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=cv,
                           n_jobs=-1)
grid_search.fit(X_train_rfe, y_train)

best_rf_model = grid_search.best_estimator_

# 7. Final Model
best_rf_model.fit(X_train_rfe, y_train)

# 8. Model Evaluation
y_pred_final = best_rf_model.predict(X_test_rfe)
accuracy_final = accuracy_score(y_test, y_pred_final)
print(f'Final Random Forest Accuracy: {accuracy_final}')

# Additional Steps: Cross-Validation, Ensemble Methods, Feature Importance
# Example: Cross-Validation
cv_scores = cross_val_score(best_rf_model, X_train_rfe, y_train, cv=cv, scoring='accuracy')
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Mean Cross-Validation Score: {cv_scores.mean()}')

# Example: Feature Importance
feature_importance = best_rf_model.feature_importances_
print(f'Feature Importance: {feature_importance}')


Random Forest Accuracy: 0.9986064659977704
Final Random Forest Accuracy: 0.9977703455964325
Cross-Validation Scores: [0.99825784 0.99686411 0.9989547  0.99860579 0.99895434]
Mean Cross-Validation Score: 0.9983273561063051
Feature Importance: [0.24537885 0.21225576 0.21899106 0.2018219  0.12155244]


In [4]:
# 3. Building Multiple ML Models
# Example: Support Vector Machine (SVM)
svm_model = SVC(kernel='linear', random_state=42)  # You can choose different kernels (linear, rbf, etc.)
svm_model.fit(X_train, y_train)

# 4. Model Evaluation
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'SVM Accuracy: {accuracy}')

# 5. Feature Selection using RFE
# Assuming you want to keep 3 features
num_features_to_keep = 3
rfe = RFE(estimator=svm_model, n_features_to_select=num_features_to_keep)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)

# 6. Fine-tuning (Optional)
# Example: Hyperparameter tuning for SVM
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1],
    'kernel': ['linear', 'rbf', 'poly']
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=SVC(random_state=42),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=cv,
                           n_jobs=-1)
grid_search.fit(X_train_rfe, y_train)

best_svm_model = grid_search.best_estimator_

# 7. Final Model
best_svm_model.fit(X_train_rfe, y_train)

# 8. Model Evaluation
y_pred_final = best_svm_model.predict(X_test_rfe)
accuracy_final = accuracy_score(y_test, y_pred_final)
print(f'Final SVM Accuracy: {accuracy_final}')

# Additional Steps: Cross-Validation, Ensemble Methods, Feature Importance
# Example: Cross-Validation
cv_scores = cross_val_score(best_svm_model, X_train_rfe, y_train, cv=cv, scoring='accuracy')
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Mean Cross-Validation Score: {cv_scores.mean()}')


SVM Accuracy: 0.9531772575250836
Final SVM Accuracy: 0.9278149386845039
Cross-Validation Scores: [0.92849676 0.92556973 0.9259724 ]
Mean Cross-Validation Score: 0.9266796273187321


In [6]:
# 3. Building Multiple ML Models
# Example: Gradient Boosting
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)

# 4. Model Evaluation
y_pred = gb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Gradient Boosting Accuracy: {accuracy}')

# 5. Feature Selection using RFE
# Assuming you want to keep 3 features
num_features_to_keep = 3
rfe = RFE(estimator=gb_model, n_features_to_select=num_features_to_keep)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)

# 6. Fine-tuning (Optional)
# Example: Hyperparameter tuning for Gradient Boosting
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=GradientBoostingClassifier(random_state=42),
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=cv,
                           n_jobs=-1)
grid_search.fit(X_train_rfe, y_train)

best_gb_model = grid_search.best_estimator_

# 7. Final Model
best_gb_model.fit(X_train_rfe, y_train)

# 8. Model Evaluation
y_pred_final = best_gb_model.predict(X_test_rfe)
accuracy_final = accuracy_score(y_test, y_pred_final)
print(f'Final Gradient Boosting Accuracy: {accuracy_final}')

# Additional Steps: Cross-Validation, Ensemble Methods, Feature Importance
# Example: Cross-Validation
cv_scores = cross_val_score(best_gb_model, X_train_rfe, y_train, cv=cv, scoring='accuracy')
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Mean Cross-Validation Score: {cv_scores.mean()}')


Gradient Boosting Accuracy: 0.9980490523968785
