In [1]:
# Import the data
from tensorflow.keras import layers, models, Model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
import torch


df = pd.read_csv('CO_Accidents_March23.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,512415,A-512446,Source2,3,2022-09-08 13:03:18,2022-09-08 13:32:26,39.848122,-104.985306,,,...,False,False,False,False,False,False,Day,Day,Day,Day
1,512525,A-512565,Source2,1,2022-09-08 08:44:08,2022-09-08 09:13:40,39.7766,-105.06263,,,...,False,False,False,False,True,False,Day,Day,Day,Day
2,512588,A-512637,Source2,2,2022-09-08 06:57:38,2022-09-08 07:27:23,39.899731,-104.868507,,,...,False,False,False,False,True,False,Day,Day,Day,Day
3,512593,A-512643,Source2,1,2022-09-08 08:45:51,2022-09-08 09:15:28,39.618809,-104.773201,,,...,False,False,False,False,True,False,Day,Day,Day,Day
4,512594,A-512644,Source2,3,2022-09-08 08:42:23,2022-09-08 09:11:57,39.725094,-105.012817,,,...,False,False,False,False,False,False,Day,Day,Day,Day


In [2]:
# PyTorch Environment 

print(torch.backends.cudnn.enabled)

print(torch.cuda.is_available())

print(torch.cuda.device_count())

!python -m torch.utils.collect_env

from timeit import default_timer as timer

# check for cuda availability
print("Cuda: ", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: ", device)


#GPU 
b = torch.ones(4000,4000).cuda() # Create matrix on GPU memory
start_time = timer() 
for _ in range(1000): 
    b += b 
elapsed_time = timer() - start_time 

print('GPU time = ',elapsed_time)


#CPU
a = torch.ones(4000,4000) # Create matrix on CPU memory
start_time = timer()
for _ in range(1000):
    a += a
elapsed_time = timer() - start_time

print('CPU time = ',elapsed_time)



True
True
1
Collecting environment information...
PyTorch version: 2.0.1+cu117
Is debug build: False
CUDA used to build PyTorch: 11.7
ROCM used to build PyTorch: N/A

OS: Microsoft Windows 11 Home
GCC version: Could not collect
Clang version: Could not collect
CMake version: Could not collect
Libc version: N/A

Python version: 3.10.13 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:24:38) [MSC v.1916 64 bit (AMD64)] (64-bit runtime)
Python platform: Windows-10-10.0.22631-SP0
Is CUDA available: True
CUDA runtime version: 12.5.40

CUDA_MODULE_LOADING set to: LAZY
GPU models and configuration: GPU 0: NVIDIA GeForce RTX 4060 Laptop GPU
Nvidia driver version: 555.85
cuDNN version: Could not collect
HIP runtime version: N/A
MIOpen runtime version: N/A
Is XNNPACK available: True

CPU:
Architecture=9


CurrentClockSpeed=2300


DeviceID=CPU0


Family=198


L2CacheSize=9728


L2CacheSpeed=


Manufacturer=GenuineIntel


MaxClockSpeed=2300


Name=12th Gen Intel(R) Core(TM) i7-12650H


Proces

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90885 entries, 0 to 90884
Data columns (total 47 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0             90885 non-null  int64  
 1   ID                     90885 non-null  object 
 2   Source                 90885 non-null  object 
 3   Severity               90885 non-null  int64  
 4   Start_Time             90885 non-null  object 
 5   End_Time               90885 non-null  object 
 6   Start_Lat              90885 non-null  float64
 7   Start_Lng              90885 non-null  float64
 8   End_Lat                47671 non-null  float64
 9   End_Lng                47671 non-null  float64
 10  Distance(mi)           90885 non-null  float64
 11  Description            90885 non-null  object 
 12  Street                 90727 non-null  object 
 13  City                   90877 non-null  object 
 14  County                 90885 non-null  object 
 15  St

In [4]:
  # extract relevant columns

relevant_df = df[['ID','County', 'Severity',
                  'Weather_Condition']]

#relevant_df = df[['ID', 'Start_Time', 'City', 'State', 'County', 'Airport_Code', 'End_Time', 'Timezone', 'Zipcode', 'Description', 'Severity',
#'Temperature(F)', 'Distance(mi)', 'Wind_Speed(mph)', 'Precipitation(in)',
#'Weather_Condition', 'Weather_Timestamp']]

[print('accident_count = ', relevant_df.count())]


accident_count =  ID                   90885
County               90885
Severity             90885
Weather_Condition    89475
dtype: int64


[None]

In [5]:
relevant_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90885 entries, 0 to 90884
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ID                 90885 non-null  object
 1   County             90885 non-null  object
 2   Severity           90885 non-null  int64 
 3   Weather_Condition  89475 non-null  object
dtypes: int64(1), object(3)
memory usage: 2.8+ MB


In [6]:
df['State'].value_counts()

CO    90885
Name: State, dtype: int64

In [7]:
CO_crashes_df =  relevant_df.loc[(df["State"] == "CO")]
CO_crashes_df['Severity'].value_counts()

2    56105
3    26306
4     7504
1      970
Name: Severity, dtype: int64

In [8]:
CO_crashes_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 90885 entries, 0 to 90884
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ID                 90885 non-null  object
 1   County             90885 non-null  object
 2   Severity           90885 non-null  int64 
 3   Weather_Condition  89475 non-null  object
dtypes: int64(1), object(3)
memory usage: 3.5+ MB


In [9]:
CO_crashes_df['County'].value_counts()

Denver       21163
Adams        12825
El Paso      10014
Jefferson     8226
Arapahoe      7547
             ...  
Montrose        50
Crowley         42
Phillips        36
San Juan        23
Hinsdale         8
Name: County, Length: 64, dtype: int64

In [10]:
# Update CO_crashes_df with numeric county code

county_dict = {
"Adams":1,"Alamosa":3,"Arapahoe":5,"Archuleta":7,"Baca":9,"Bent":11,"Boulder":13,"Broomfield":14,"Chaffee":15,"Cheyenne":17,"Clear Creek":19,"Conejos":21,"Costilla":23,"Crowley":25,"Custer":27,
"Delta":29,"Denver":31,"Dolores":33,"Douglas":35,"Eagle":37,"El Paso":41,"Elbert":39,"Fremont":43,"Garfield":45,"Gilpin":47,"Grand":49,"Gunnison":51,"Hinsdale":53,"Huerfano":55,
"Jackson":57,"Jefferson":59,"Kiowa":61,"Kit Carson":63,"La Plata":67,"Lake":65,"Larimer":69,"Las Animas":71,"Lincoln":73,"Logan":75,"Mesa":77,"Mineral":79,
"Moffat":81,"Montezuma":83,"Montrose":85,"Morgan":87,"Otero":89,"Ouray":91,"Park":93,"Phillips":95,"Pitkin":97,"Prowers":99,"Pueblo":101,"Rio Blanco":103,"Rio Grande":105,"Routt":107,
"Saguache":109,"San Juan":111,"San Miguel":113,"Sedgwick":115,"Summit":117,"Teller":119,"Washington":121,"Weld":123,"Yuma":125
}

CO_crashes_df_updated =  CO_crashes_df.replace({"County": county_dict})

CO_crashes_df_updated.head(5)

Unnamed: 0,ID,County,Severity,Weather_Condition
0,A-512446,1,3,Partly Cloudy
1,A-512565,59,1,Fair
2,A-512637,1,2,Fair
3,A-512643,5,1,Fair
4,A-512644,31,3,Fair


In [11]:
# Preprocess Weather_Condition

# Preprocess "Weather_Condition" column (one-hot encoding)
weather_encoder = OneHotEncoder(sparse_output=False)
weather_encoded = weather_encoder.fit_transform(df[['Weather_Condition']])
weather_columns = weather_encoder.get_feature_names_out(['Weather_Condition'])
df_weather_encoded = pd.DataFrame(weather_encoded, columns=weather_columns)
df_weather_encoded.head(5)

Unnamed: 0,Weather_Condition_Blowing Dust,Weather_Condition_Blowing Dust / Windy,Weather_Condition_Blowing Snow,Weather_Condition_Blowing Snow / Windy,Weather_Condition_Clear,Weather_Condition_Cloudy,Weather_Condition_Cloudy / Windy,Weather_Condition_Drizzle,Weather_Condition_Drizzle and Fog,Weather_Condition_Fair,...,Weather_Condition_T-Storm / Windy,Weather_Condition_Thunder,Weather_Condition_Thunder / Windy,Weather_Condition_Thunder / Wintry Mix,Weather_Condition_Thunder in the Vicinity,Weather_Condition_Thunderstorm,Weather_Condition_Thunderstorms and Rain,Weather_Condition_Widespread Dust / Windy,Weather_Condition_Wintry Mix,Weather_Condition_nan
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
le = LabelEncoder()
CO_crashes_df_updated['ID'] = le.fit_transform(df['ID'])
CO_crashes_df_updated.set_index('ID')

CO_crashes_df_updated.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 90885 entries, 0 to 90884
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   ID                 90885 non-null  int32 
 1   County             90885 non-null  int64 
 2   Severity           90885 non-null  int64 
 3   Weather_Condition  89475 non-null  object
dtypes: int32(1), int64(2), object(1)
memory usage: 3.1+ MB


In [13]:

CO_crashes_df_updated.head(5)

Unnamed: 0,ID,County,Severity,Weather_Condition
0,57335,1,3,Partly Cloudy
1,57362,59,1,Fair
2,57373,1,2,Fair
3,57374,5,1,Fair
4,57375,31,3,Fair


In [14]:
df_weather_encoded.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90885 entries, 0 to 90884
Data columns (total 75 columns):
 #   Column                                          Non-Null Count  Dtype  
---  ------                                          --------------  -----  
 0   Weather_Condition_Blowing Dust                  90885 non-null  float64
 1   Weather_Condition_Blowing Dust / Windy          90885 non-null  float64
 2   Weather_Condition_Blowing Snow                  90885 non-null  float64
 3   Weather_Condition_Blowing Snow / Windy          90885 non-null  float64
 4   Weather_Condition_Clear                         90885 non-null  float64
 5   Weather_Condition_Cloudy                        90885 non-null  float64
 6   Weather_Condition_Cloudy / Windy                90885 non-null  float64
 7   Weather_Condition_Drizzle                       90885 non-null  float64
 8   Weather_Condition_Drizzle and Fog               90885 non-null  float64
 9   Weather_Condition_Fair                 

In [15]:
# define X
X = CO_crashes_df_updated.drop(columns=['County', 'Weather_Condition'])

df_weather_encoded = pd.concat([CO_crashes_df_updated, df_weather_encoded], axis=1)

# Preprocess "color" column (label encoding for binary; one-hot encoding for multiple categories)
county_encoder = LabelEncoder()
df_weather_encoded['County'] = county_encoder.fit_transform(df['County'])

y_county = df_weather_encoded['County']

y_weather = df_weather_encoded[weather_columns]

# Split data into training and testing sets

X_train, X_test, y_county_train, y_county_test, y_weather_train, y_weather_test = train_test_split(X, y_county, y_weather)

In [16]:
# Create the shared layers of the model

# Input layer
input_layer = layers.Input(shape=(X.shape[1],), name='input_features')

# Shared hidden layers
shared_layer1 = layers.Dense(64, activation='relu')(input_layer)
shared_layer2 = layers.Dense(32, activation='relu')(shared_layer1)

In [17]:
# Branch for quality prediction
weather_output = layers.Dense(75, activation='softmax', name='weather_output')(shared_layer2)

# Branch for color prediction
county_output = layers.Dense(1, activation='sigmoid', name='county_output')(shared_layer2)

In [18]:
# Create the model
model = Model(inputs=input_layer, outputs=[weather_output, county_output])

# Compile the model
model.compile(optimizer='adam',
              loss={'county_output': 'categorical_crossentropy', 'weather_output': 'binary_crossentropy'},
              metrics={'county_output': 'accuracy', 'weather_output': 'accuracy'})

# Display the model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________


 Layer (type)                Output Shape                 Param #   Connected to                  
 input_features (InputLayer  [(None, 2)]                  0         []                            
 )                                                                                                
                                                                                                  
 dense (Dense)               (None, 64)                   192       ['input_features[0][0]']      
                                                                                                  
 dense_1 (Dense)             (None, 32)                   2080      ['dense[0][0]']               
                                                                                                  
 weather_output (Dense)      (None, 75)                   2475      ['dense_1[0][0]']             
                                                                                                  
 county_ou

In [19]:
[print(i.shape, i.dtype) for i in model.inputs]
[print(o.shape, o.dtype) for o in model.outputs]
[print(l.name, l.input_shape, l.dtype) for l in model.layers]

(None, 2) <dtype: 'float32'>
(None, 75) <dtype: 'float32'>
(None, 1) <dtype: 'float32'>
input_features [(None, 2)] float32
dense (None, 2) float32
dense_1 (None, 64) float32
weather_output (None, 32) float32
county_output (None, 32) float32


[None, None, None, None, None]

In [20]:
# Fit the model
start_time = timer()
model.fit(
    X,
    {'county_output': y_county, 'weather_output': y_weather},
    epochs=10,
    batch_size=32,
    validation_split=0.2
)
elapsed_time = timer() - start_time

print('Fit Model Time = ',elapsed_time)

Epoch 1/10


  return dispatch_target(*args, **kwargs)




  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fit Model Time =  57.2523125000007


In [21]:
# Evaluate the model with the testing data
test_results = model.evaluate(X_test, {'county_output': y_county_test, 'weather_output': y_weather_test})
test_results



[150309.625, 150309.625, 0.0, 0.27854061126708984, 0.0007041633944027126]

In [22]:
# Print the accuracy
print(f"County Accuracy: {test_results[3]}")
print(f"Weather Accuracy: {test_results[4]}")

County Accuracy: 0.27854061126708984
Weather Accuracy: 0.0007041633944027126
