In [1]:
# Set start time

from datetime import datetime
overall_start_time = datetime.now()

In [2]:
# Import the data
from tensorflow.keras import layers, models, Model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
import torch

In [3]:
# PyTorch Environment 

print(torch.backends.cudnn.enabled)

print(torch.cuda.is_available())

print(torch.cuda.device_count())

!python -m torch.utils.collect_env

from timeit import default_timer as timer

# check for cuda availability
print("Cuda: ", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: ", device)


#GPU 
b = torch.ones(4000,4000).cuda() # Create matrix on GPU memory
start_time = timer() 
for _ in range(1000): 
    b += b 
elapsed_time = timer() - start_time 

print('GPU time = ',elapsed_time)


#CPU
a = torch.ones(4000,4000) # Create matrix on CPU memory
start_time = timer()
for _ in range(1000):
    a += a
elapsed_time = timer() - start_time

print('CPU time = ',elapsed_time)

True
True
1
Collecting environment information...
PyTorch version: 2.0.1+cu117
Is debug build: False
CUDA used to build PyTorch: 11.7
ROCM used to build PyTorch: N/A

OS: Microsoft Windows 11 Home
GCC version: Could not collect
Clang version: Could not collect
CMake version: Could not collect
Libc version: N/A

Python version: 3.10.13 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:24:38) [MSC v.1916 64 bit (AMD64)] (64-bit runtime)
Python platform: Windows-10-10.0.22631-SP0
Is CUDA available: True
CUDA runtime version: 12.5.40

CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: GPU 0: NVIDIA GeForce RTX 4060 Laptop GPU
Nvidia driver version: 555.85
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture=9


CurrentClockSpeed=2300


DeviceID=CPU0


Family=198


L2CacheSize=9728


L2CacheSpeed=


Manufacturer=GenuineIntel


MaxClockSpeed=2300


Name=12th Gen Intel(R) Core(TM) i7-12650H


Proces

In [4]:
# Load the CSV file
CO_crashes_df_updated = pd.read_csv('./Resources/result_files/step1_build_base_data.csv')

In [5]:
# Display the first five rows of the DataFrame
CO_crashes_df_updated.head()

Unnamed: 0.1,Unnamed: 0,ID,County,Severity,Start_Time,End_Time,Weather_Condition,Start_Time_int,crash_duration
0,0,57335,1,3,2022-09-08 13:03:18,2022-09-08 13:32:26,Partly Cloudy,20220908,1748000000000.0
1,1,57362,59,1,2022-09-08 08:44:08,2022-09-08 09:13:40,Fair,20220908,1772000000000.0
2,2,57373,1,2,2022-09-08 06:57:38,2022-09-08 07:27:23,Fair,20220908,1785000000000.0
3,3,57374,5,1,2022-09-08 08:45:51,2022-09-08 09:15:28,Fair,20220908,1777000000000.0
4,4,57375,31,3,2022-09-08 08:42:23,2022-09-08 09:11:57,Fair,20220908,1774000000000.0


In [6]:
# Preprocess "Weather_Condition" column (one-hot encoding)
weather_encoder = OneHotEncoder(sparse_output=False)
weather_encoded = weather_encoder.fit_transform(CO_crashes_df_updated[['Weather_Condition']])
weather_columns = weather_encoder.get_feature_names_out(['Weather_Condition'])
df_weather_encoded = pd.DataFrame(weather_encoded, columns=weather_columns)
df_weather_encoded.head(5)

Unnamed: 0,Weather_Condition_Cloudy,Weather_Condition_Cloudy Windy,Weather_Condition_Cloudy _ Windy,Weather_Condition_Drizzle,Weather_Condition_Fair,Weather_Condition_Fair _ Windy,Weather_Condition_Fog,Weather_Condition_Haze,Weather_Condition_Heavy Snow,Weather_Condition_Light Drizzle,...,Weather_Condition_Rain,Weather_Condition_Showers in the Vicinity,Weather_Condition_Smoke,Weather_Condition_Snow,Weather_Condition_T_Storm,Weather_Condition_Thunder,Weather_Condition_Thunder in the Vicinity,Weather_Condition_Windy,Weather_Condition_Wintry Mix,Weather_Condition_nan
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
CO_crashes_df_updated['Weather_Condition'].value_counts()

Fair                       32024
Mostly Cloudy              16447
Partly Cloudy              13865
Cloudy                     12382
Light Snow                  6424
Snow                        1145
Fair _ Windy                1064
Light Rain                  1024
Fog                          748
Mostly Cloudy _ Windy        574
Partly Cloudy _ Windy        473
Haze                         430
Light Drizzle                404
Heavy Snow                   382
Rain                         276
Light Snow _ Windy           242
Cloudy _ Windy               234
Thunder in the Vicinity      217
Showers in the Vicinity      205
Smoke                        202
Light Rain with Thunder      166
Thunder                      156
T_Storm                      156
Wintry Mix                   154
Light Snow Windy              48
Cloudy Windy                  16
Windy                         12
Light Snow  Windy              2
Light Snow Showers             2
Drizzle                        1
Name: Weat

In [8]:
# define X
X = CO_crashes_df_updated.drop(columns=['County', 'Weather_Condition', 'Start_Time', 'End_Time', 'crash_duration'] )

df_weather_encoded = pd.concat([CO_crashes_df_updated, df_weather_encoded], axis=1)

# Define Y

df_weather_encoded['County'] = CO_crashes_df_updated['County']

y_county = df_weather_encoded['County']

y_weather = df_weather_encoded[weather_columns]

# Split data into training and testing sets

X_train, X_test, y_county_train, y_county_test, y_weather_train, y_weather_test = train_test_split(X, y_county, y_weather)

X.head()

Unnamed: 0.1,Unnamed: 0,ID,Severity,Start_Time_int
0,0,57335,3,20220908
1,1,57362,1,20220908
2,2,57373,2,20220908
3,3,57374,1,20220908
4,4,57375,3,20220908


In [9]:
# Create a StandardScaler
scaler = StandardScaler()

# Fit the StandardScaler to the training data
scaler.fit(X_train)

# Scale the training and testing data
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Create the shared layers of the model

# Input layer
input_layer = layers.Input(shape=(X.shape[1],), name='input_features')

# Shared hidden layers
shared_layer1 = layers.Dense(64, activation='relu')(input_layer)
shared_layer2 = layers.Dense(32, activation='relu')(shared_layer1)

In [11]:
# Branch for weather prediction
weather_output = layers.Dense(31, activation='softmax', name='weather_output')(shared_layer2)

# Branch for county prediction
county_output = layers.Dense(1, activation='sigmoid', name='county_output')(shared_layer2)

In [12]:
# Create the model
model = Model(inputs=input_layer, outputs=[weather_output, county_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'county_output': 'categorical_crossentropy', 'weather_output': 'binary_crossentropy'},
              metrics={'county_output': 'accuracy', 'weather_output': 'accuracy'})

# Display the model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_features (InputLayer  [(None, 4)]                  0         []                            
 )                                                                                                
                                                                                                  
 dense (Dense)               (None, 64)                   320       ['input_features[0][0]']      
                                                                                                  
 dense_1 (Dense)             (None, 32)                   2080      ['dense[0][0]']               
                                                                                                  
 weather_output (Dense)      (None, 31)                   1023      ['dense_1[0][0]']         

In [13]:
# Fit the model
start_time = timer()
model.fit(
    X_train_scaled,
    {'county_output': y_county, 'weather_output': y_weather},
    epochs=10,
    batch_size=32,
    validation_split=0.2
)
elapsed_time = timer() - start_time

print('Fit Model Time = ',elapsed_time)

Epoch 1/10


  return dispatch_target(*args, **kwargs)




  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fit Model Time =  20.686029399977997


In [14]:
# Evaluate the model with the testing data
test_results = model.evaluate(X_test, {'county_output': y_county_test, 'weather_output': y_weather_test})
test_results

111/711 [===>..........................] - ETA: 0s - loss: 77781192.0000 - weather_output_loss: 77781192.0000 - county_output_loss: 0.0000e+00 - weather_output_accuracy: 0.3499 - county_output_accuracy: 0.1382   

  return dispatch_target(*args, **kwargs)




[76853992.0, 76853992.0, 0.0, 0.3503652811050415, 0.13964439928531647]

In [15]:
# Print the accuracy
print(f"Weather Accuracy: {test_results[3]}")
print(f"County Accuracy: {test_results[4]}")

Weather Accuracy: 0.3503652811050415
County Accuracy: 0.13964439928531647


In [16]:
# Calculate Duration
overall_end_time = datetime.now()
print('Duration: {}'.format(overall_end_time - overall_start_time))

Duration: 0:00:40.851487
