In [1]:
pip install pandas scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load your dataset
data = pd.read_csv('traffic_data.csv')

# Step 2: Preprocess the data
# Assume your dataset has a target variable 'Traffic_Flow' and features are other columns
X = data.drop(columns=['Traffic_Flow'])
y = data['Traffic_Flow']

# Convert categorical columns if necessary
X = pd.get_dummies(X)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define models
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'KNeighbors Classifier': KNeighborsClassifier(),
    'Random Forest Classifier': RandomForestClassifier()
}

# Step 5: Set up hyperparameter grids for AutoML
param_grids = {
    'Logistic Regression': {
        'C': [0.01, 0.1, 1, 10],
        'solver': ['liblinear', 'saga']
    },
    'KNeighbors Classifier': {
        'n_neighbors': [3, 5, 7, 10],
        'weights': ['uniform', 'distance']
    },
    'Random Forest Classifier': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30]
    }
}

# Step 6: Train models and evaluate
best_model = None
best_accuracy = 0

for model_name, model in models.items():
    grid_search = GridSearchCV(model, param_grids[model_name], cv=5)
    grid_search.fit(X_train, y_train)
    
    # Best model from Grid Search
    best_model_instance = grid_search.best_estimator_
    
    # Predict on the test set
    y_pred = best_model_instance.predict(X_test)
    
    # Evaluate
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy:.4f}")
    print(classification_report(y_test, y_pred))
    
    # Select the best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = best_model_instance

print(f"Best Model: {best_model} with Accuracy: {best_accuracy:.4f}")


KeyError: "['Traffic_Flow'] not found in axis"