In [14]:
import numpy as np
import pandas as pd

# Creating a synthetic dataset
np.random.seed()

# dataset Size
data_size = 500
data = {
    "speed": np.random.randint(0, 120, data_size),
    "engine_temp": np.random.uniform(60, 120, data_size),
    "fuel_level": np.random.uniform(10, 100, data_size),
    "tire_pressure": np.random.uniform(30, 35, data_size),
    "car_age": np.random.randint(0, 20, data_size),
    "weather_condition": np.random.randint(0, 3, data_size),
    "driver_experience": np.random.randint(0, 3, data_size),
    "traffic_condition": np.random.randint(0, 3, data_size),
}

df = pd.DataFrame(data)

#Here i am using lambda function
df['action'] = df['speed'].apply(lambda x: "really fast" if x >= 100 else "fast" if x >= 80 else "normal" if x >= 60 else "slow" if 20 <= x <= 30 else "very slow")

# Saved the file in csv format
df.to_csv('car_speed_data.csv', index=False)


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('car_speed_data.csv')

# Features and target we are looking for
X = df.drop('action', axis=1)
y = df['action']

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [17]:
# different models for evaluation
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score

# Defining models
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "SVM": SVC()
}

# Storing the results in this array
results = []

for name, model in models.items():
    # Cross-validation accuracy
    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
    
    results.append({
        "Model": name,
        "CV Accuracy": cv_scores.mean(),
        "Test Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)
results_df



Unnamed: 0,Model,CV Accuracy,Test Accuracy,Precision,Recall,F1 Score
0,Logistic Regression,0.81,0.8,0.714981,0.8,0.74883
1,Decision Tree,0.995,1.0,1.0,1.0,1.0
2,Random Forest,0.9175,0.91,0.922351,0.91,0.893084
3,Gradient Boosting,0.995,1.0,1.0,1.0,1.0
4,SVM,0.685,0.76,0.671655,0.76,0.710088
