# Training Road Safety ML Model

## Loading Data

In [None]:
# prompt: mount colab

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd
import torch
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import torch.nn as nn
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('/content/drive/MyDrive/trafficaccidents_train.csv')
df_test = pd.read_csv('/content/drive/MyDrive/trafficaccidents_test.csv')
df = df.dropna()
df.head()

Unnamed: 0,Severity,Distance(mi),Temperature(F),Visibility(mi),Weather_Condition,Amenity,Bump,Crossing,Give_Way,Junction,No_Exit,Railway,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Duration
0,3,0.01,36.9,10.0,Light Rain,0,0,0,0,0,0,0,0,0,0,0,0,0,314.0
1,2,0.01,36.0,10.0,Overcast,0,0,0,0,0,0,0,0,0,0,0,1,0,30.0
2,3,0.01,35.1,9.0,Mostly Cloudy,0,0,0,0,0,0,0,0,0,0,0,0,0,30.0
3,2,0.01,36.0,6.0,Mostly Cloudy,0,0,0,0,0,0,0,0,0,0,0,1,0,30.0
4,3,0.01,37.9,7.0,Light Rain,0,0,0,0,0,0,0,0,0,0,0,0,0,30.0


## Seperating Features and Targets

In [None]:
def normalize_data(df, features_to_normalize):
  """
  Normalizes specific features in a pandas dataframe using StandardScaler.

  Args:
      df (pandas.DataFrame): The dataframe containing your data.
      features_to_normalize (list): A list of feature names to normalize.

  Returns:
      tuple: A tuple containing three elements:
          - df_normalized (pandas.DataFrame): The dataframe with normalized features.
          - scaler (sklearn.preprocessing.StandardScaler): The fitted StandardScaler object.
  """

  # Separate features for normalization
  features_to_normalize = list(features_to_normalize)  # Ensure list type
  features_to_keep = [col for col in df.columns if col not in features_to_normalize]
  df_to_normalize = df[features_to_normalize]

  # Create and fit StandardScaler
  scaler = StandardScaler()
  scaler.fit(df_to_normalize)

  # Transform features and combine with non-normalized features
  df_normalized = pd.concat([pd.DataFrame(scaler.transform(df_to_normalize)), df[features_to_keep]], axis=1)

  return df_normalized, scaler


In [None]:
# Separate features and targets (considering your specified targets)
features = df[
    [
        'Temperature(F)',
        'Visibility(mi)',
        'Amenity',
        'Bump',
        'Crossing',
        'Give_Way',
        'Junction',
        'No_Exit',
        'Railway',
        'Roundabout',
        'Station',
        'Stop',
        'Traffic_Calming',
        'Traffic_Signal'
    ]
]
targets = df[['Severity', 'Duration', 'Distance(mi)']]  # Assuming all three are targets

# One-Hot Encode the categorical feature 'Weather_Condition'
encoder = OneHotEncoder(sparse_output=False)
encoded_weather = encoder.fit_transform(df[['Weather_Condition']])

# Normalize numerical features (excluding the encoded categorical feature)
features_to_normalize = ['Temperature(F)', 'Visibility(mi)']  # Replace with your features to normalize
df_normalized, scaler = normalize_data(features.copy(), features_to_normalize)

# Combine features with encoded weather condition and scaled numerical features
features_normalized = pd.concat([df_normalized, pd.DataFrame(encoded_weather)], axis=1)

# Convert features and targets to NumPy arrays
input = features_normalized.to_numpy()
targets = targets.to_numpy()

# # Convert NumPy arrays to PyTorch tensors (float32 for linear regression)
# inputs = torch.from_numpy(X).float()
# targets = torch.from_numpy(y).float()

# # Optional: Move tensors to GPU (if available)
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# inputs = inputs.to(device)
# targets = targets.to(device)

# # Now you have your normalized features (inputs) and targets (targets) as PyTorch tensors
# print(inputs.shape)  # Check the shape of your tensors
# print(targets.shape)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(input, targets, test_size=0.1, random_state=42)

## Training and Export

In [None]:
from sklearn.ensemble import RandomForestRegressor # Or RandomForestClassifier for classification

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(input, targets)

In [None]:
import pickle

with open('my_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)

In [None]:
!mkdir /content/drive/MyDrive/Random_Forrest_Models

In [None]:
!cp my_model.pkl /content/drive/MyDrive/Random_Forrest_Models/