In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the CSV file
df = pd.read_csv('synthetic_road_data_highway_service.csv')

# Select features and target variable
X = df[['lat', 'lon', 'speed (m/s)']]  # Features: latitude, longitude, speed
y = df['road_type']  # Target: road type

# Encode target variable
y = y.map({'highway': 0, 'service_road': 1})  # Convert 'highway' to 0 and 'service_road' to 1

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Function to classify the road type based on user input
def classify_road_type(lat, lon, speed):
    # Prepare the input data
    input_data = scaler.transform([[lat, lon, speed]])
    # Predict the road type
    prediction = model.predict(input_data)
    # Map the prediction back to road type
    road_type = 'highway' if prediction[0] == 0 else 'service_road'
    return road_type

# Get user input
user_lat = float(input("Enter latitude: "))
user_lon = float(input("Enter longitude: "))

# Predict the speed based on the given lat and lon (using mean speed in dataset for simplicity)
mean_speed = df[(df['lat'] >= user_lat - 0.01) & (df['lat'] <= user_lat + 0.01) &
                (df['lon'] >= user_lon - 0.01) & (df['lon'] <= user_lon + 0.01)]['speed (m/s)'].mean()

# Classify road type using the user's input
if pd.notna(mean_speed):
    road_type = classify_road_type(user_lat, user_lon, mean_speed)
    print(f"Predicted road type: {road_type}")
else:
    print("Insufficient data to predict road type.")


Enter latitude:  12.8333
Enter longitude:  77.8333


Insufficient data to predict road type.


In [4]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the CSV file
df = pd.read_csv('synthetic_road_data_highway_service.csv')

# Select features and target variable
X = df[['lat', 'lon', 'speed (m/s)']]  # Features: latitude, longitude, speed
y = df['road_type']  # Target: road type

# Encode target variable
y = y.map({'highway': 0, 'service_road': 1})  # Convert 'highway' to 0 and 'service_road' to 1

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
accuracy

0.545

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize the RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters
best_params = grid_search.best_params_

# Train the model with the best parameters
best_rf = grid_search.best_estimator_
best_rf.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred_best = best_rf.predict(X_test_scaled)

# Calculate accuracy
accuracy_best = accuracy_score(y_test, y_pred_best)

# Print the best parameters and accuracy
best_params, accuracy_best


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Select features and target variable
X = df[['lat', 'lon', 'speed (m/s)', 'heading (degrees)', 'altitude (m)']]  # Include additional features
y = df['road_type']  # Target: road type

# Encode target variable
y = y.map({'highway': 0, 'service_road': 1})  # Convert 'highway' to 0 and 'service_road' to 1

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

# Initialize the RandomForestClassifier
rf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model with the best parameters
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters and accuracy
best_params = grid_search.best_params_
best_rf = grid_search.best_estimator_

# Predict on the test set with the best model
y_pred_best = best_rf.predict(X_test_scaled)
accuracy_best = accuracy_score(y_test, y_pred_best)

best_params, accuracy_best


Fitting 5 folds for each of 216 candidates, totalling 1080 fits


({'bootstrap': True,
  'max_depth': None,
  'min_samples_leaf': 1,
  'min_samples_split': 10,
  'n_estimators': 100},
 0.985)

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
df = pd.read_csv('synthetic_road_data_highway_service.csv')

# Define features and target
X = df[['lat', 'lon', 'altitude(m)', 'speed(m/s)', 'heading(degrees)']]
y = df['road_type']

# Preprocessing steps
numeric_features = ['lat', 'lon', 'altitude(m)', 'speed(m/s)', 'heading(degrees)']
categorical_features = ['road_type']

# Create the preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), ['road_type'])
    ])

# Create the full pipeline with preprocessing and model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)


KeyError: "['altitude(m)', 'speed(m/s)', 'heading(degrees)'] not in index"

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load your dataset
df = pd.read_csv('synthetic_road_data_highway_service.csv')

# Inspect column names
print(df.columns)

# Adjust column names as needed based on inspection
X = df[['lat', 'lon', 'altitude(m)', 'speed(m/s)', 'heading(degrees)']]
y = df['road_type']

# Define preprocessing
numeric_features = ['lat', 'lon', 'altitude(m)', 'speed(m/s)', 'heading(degrees)']
categorical_features = ['road_type']

# Create the preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(), ['road_type'])
    ])

# Create the full pipeline with preprocessing and model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(report)


Index(['lat', 'lon', 'speed (m/s)', 'heading (degrees)', 'altitude (m)',
       'road_type'],
      dtype='object')


KeyError: "['altitude(m)', 'speed(m/s)', 'heading(degrees)'] not in index"

In [9]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset (replace with your file path)
data = pd.read_csv('synthetic_road_data_highway_service.csv')

# Inspect the dataset
print(data.head())

# Assuming the last column is the target and the rest are features
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the SVM classifier
svm_classifier = SVC(kernel='rbf', C=1, gamma='scale')

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = svm_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

         lat        lon  speed (m/s)  heading (degrees)  altitude (m)  \
0  12.937454  77.537027     7.851171          20.631712           918   
1  12.995071  77.608380     7.409364          62.852346           924   
2  12.973199  77.674589    27.187637          95.046190           904   
3  12.959866  77.646445     7.486386         256.240060           907   
4  12.915602  77.661312     8.158492         323.407899           906   

      road_type  
0  service_road  
1  service_road  
2       highway  
3  service_road  
4  service_road  
Accuracy: 84.50%
Classification Report:
              precision    recall  f1-score   support

     highway       0.76      1.00      0.86        98
service_road       1.00      0.70      0.82       102

    accuracy                           0.84       200
   macro avg       0.88      0.85      0.84       200
weighted avg       0.88      0.84      0.84       200

Confusion Matrix:
[[98  0]
 [31 71]]
