In [66]:
import pandas as pd
import numpy as np


#### Reading data

In [35]:
CA_data = pd.read_csv('CA_data.csv')

columns = ['Weather Vehicle 1', 'Lighting Vehicle 1', 'Roadway Surface Vehicle 1', 
           'Movement Preceding Collision Vehicle 1','Vehicle 1 was Stopped in Traffic', 
           'Vehicle 1 was Moving','Number of Vehicles involved in Accident (w V1)']



### Preprocess Data

In [36]:
# dropping columns with null data
for col in columns:
    print("\n")
    CA_data.drop(CA_data[CA_data[col] == 'Not Available'].index, inplace=True)
    



















### Grouping data



#### Column Weather Vehicle 1
We are grouping Fog/visiblity with Dark because they are visual impariements\
We are grouping slippery, wind with raining because when it rains, it usally reults in slipperiness and sometimes wind\

In [37]:
# grouping less frequent data 
CA_data['Weather Vehicle 1'].replace(['Fog/Visibility', 'Dark'], 'Fog/Visibility/Dark', inplace=True)
CA_data['Weather Vehicle 1'].replace(['Slippery', 'Wind'], 'Raining', inplace=True)

print(CA_data['Weather Vehicle 1'].value_counts())

Clear                  263
Cloudy                  29
Raining                 14
Fog/Visibility/Dark      4
Name: Weather Vehicle 1, dtype: int64


#### Lighting Vehicle 1
Grouping no unusla condition and daylight b/c they basically the same\
Grouping Dusk/dawn with dark with street lights b/c they are relatively similar conditions\

In [38]:
CA_data['Lighting Vehicle 1'].replace(['No unusual condition'], 'Daylight', inplace=True)
CA_data['Lighting Vehicle 1'].replace(['Dusk/Dawn'], 'Dark w Street-lights', inplace=True)


print(CA_data['Lighting Vehicle 1'].value_counts())

Daylight                                225
Dark w Street-lights                     81
No unusual conditions                     2
Dark w Non-functioning Street-lights      2
Name: Lighting Vehicle 1, dtype: int64


#### Movement Preceding Collision Vehicle 1
Wet roads = slippery roads\
Dropped other and proceeding straight b/c they dont provide much value\

In [39]:

CA_data['Roadway Surface Vehicle 1'].replace(['Slippery'], 'Wet', inplace=True)
CA_data.drop(CA_data[(CA_data['Roadway Surface Vehicle 1'] == 'Other') | (CA_data['Roadway Surface Vehicle 1'] == 'Proceeding Straight')].index, inplace=True)

print(CA_data['Roadway Surface Vehicle 1'].value_counts())


Dry    286
Wet     22
Name: Roadway Surface Vehicle 1, dtype: int64


### Movement Preceding Collusion vehicle 1

#### grouping turns, high movements, parking, and dropping others

In [40]:
CA_data['Movement Preceding Collision Vehicle 1'].replace(['Making Right Turn', 'Making Left Turn', 'Making U turn', 'Making Right Turn, Slowing/Stopping'], 'Turning', inplace=True)
CA_data['Movement Preceding Collision Vehicle 1'].replace(['Changing Lanes', 'Entering Traffic', 'Entrering Traffic','Xing into opposing lane', 'Passing Other Vehicle'], 'Highway movement', inplace=True)
CA_data['Movement Preceding Collision Vehicle 1'].replace(['Slowing/Stopping', 'Stopped in Traffic', 'Parked', 'Stopped, Merging'], 'Stopped', inplace=True)
CA_data['Movement Preceding Collision Vehicle 1'].replace(['Parking Manuerver'], 'Parking', inplace=True)
CA_data.drop(CA_data[(CA_data['Movement Preceding Collision Vehicle 1'] == 'Other')].index, inplace=True)

print(CA_data['Movement Preceding Collision Vehicle 1'].value_counts())

Stopped                151
Proceeding Straight     91
Turning                 29
Parking                 11
Highway movement        10
Backing                  8
Parking Manuever         3
Name: Movement Preceding Collision Vehicle 1, dtype: int64


In [41]:
print(columns)

['Weather Vehicle 1', 'Lighting Vehicle 1', 'Roadway Surface Vehicle 1', 'Movement Preceding Collision Vehicle 1', 'Vehicle 1 was Stopped in Traffic', 'Vehicle 1 was Moving', 'Number of Vehicles involved in Accident (w V1)']


Fixing some off the mislabeled data

In [42]:

# print(CA_data['Vehicle 1 was Stopped in Traffic'].value_counts())  
# print(CA_data['Vehicle 1 was Moving'].value_counts())

CA_data['Vehicle 1 was Stopped in Traffic'].replace(['/Off'], 'Yes', inplace=True)
CA_data['Vehicle 1 was Moving'].replace(['/Off'], 'No', inplace=True)

CA_data['Vehicle 1 was Stopped in Traffic'].replace(['yes'], 'Yes', inplace=True)
CA_data['Vehicle 1 was Moving'].replace(['Moving'], 'Yes', inplace=True)

print(CA_data['Vehicle 1 was Stopped in Traffic'].value_counts())  
print(CA_data['Vehicle 1 was Moving'].value_counts())

No     192
Yes    165
Name: Vehicle 1 was Stopped in Traffic, dtype: int64
Yes    183
No     174
Name: Vehicle 1 was Moving, dtype: int64


In [43]:
print(CA_data['Number of Vehicles involved in Accident (w V1)'].value_counts())

2.0    305
1.0     43
3.0      6
Name: Number of Vehicles involved in Accident (w V1), dtype: int64


In [44]:
# one hot encode our new columns
categorical_columns = ['Weather Vehicle 1', 'Lighting Vehicle 1', 'Roadway Surface Vehicle 1', 
                        'Movement Preceding Collision Vehicle 1']

encoded_CA_data = pd.get_dummies(CA_data, columns=categorical_columns, prefix='encoded_')
encoded_columns = encoded_CA_data.columns[encoded_CA_data.columns.str.startswith('encoded_')]



Preprocess outputs

In [45]:
from sklearn.preprocessing import LabelEncoder

encoded_CA_data.drop(encoded_CA_data[(encoded_CA_data['Vehicle Damage'] == 'Not Available')].index, inplace=True)



label_encoder = LabelEncoder()
encoded_CA_data['Vehicle Damage'] = label_encoder.fit_transform(encoded_CA_data['Vehicle Damage'])
encoded_CA_data['Vehicle 1 was Stopped in Traffic'] = label_encoder.fit_transform(encoded_CA_data['Vehicle 1 was Stopped in Traffic'])

encoded_CA_data['Vehicle 1 was Moving'] = label_encoder.fit_transform(encoded_CA_data['Vehicle 1 was Moving'])


print(encoded_CA_data['Vehicle Damage'].value_counts())
print(encoded_CA_data['Vehicle 1 was Stopped in Traffic'].value_counts())
print(encoded_CA_data['Vehicle 1 was Moving'].value_counts())



1    217
3     53
2     49
0      8
Name: Vehicle Damage, dtype: int64
0    175
1    152
Name: Vehicle 1 was Stopped in Traffic, dtype: int64
1    169
0    158
Name: Vehicle 1 was Moving, dtype: int64


In [46]:
# Split data into training and testing
from sklearn.model_selection import train_test_split

X = encoded_CA_data[encoded_columns]
y = encoded_CA_data['Vehicle Damage']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)   



In [47]:
y.value_counts()

1    217
3     53
2     49
0      8
Name: Vehicle Damage, dtype: int64

### SVM

In [51]:
from sklearn import svm
from sklearn.model_selection import cross_val_score


clf = svm.SVC(kernel='linear')
scores = cross_val_score(clf, X, y, cv=7)

print(scores)
print(np.mean(scores))




[0.65957447 0.63829787 0.74468085 0.82978723 0.80851064 0.76086957
 0.67391304]
0.730804810360777


### Neural network

In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class FNN(nn.Module):
    def __init__(self, input_channels, output_channels):

        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_channels, 6)
        self.fc2 = nn.Linear(6, 4)
        self.fc3 = nn.Linear(4, output_channels)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [53]:
y_train.value_counts()

1    174
3     41
2     40
0      6
Name: Vehicle Damage, dtype: int64

In [69]:
X_train_np = X_train.values
X_test_np = X_test.values
y_train_np = y_train.values
y_test_np = y_test.values


X_train_tensor = torch.tensor(X_train_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32)


In [71]:
# model
model = FNN(X_train_tensor.shape[1], y.value_counts().shape[0])
print(X_train_tensor.shape[1], y.value_counts().shape[0])
device = 'cuda'
print(torch.__version__)
model.to(device)   

# hyper parameters
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 100


15 4
2.3.0


DeferredCudaCallError: CUDA call failed lazily at initialization with error: module 'torch' has no attribute 'version'

CUDA call was originally invoked at:

  File "c:\Users\bobth\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Users\bobth\anaconda3\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\bobth\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
    self.io_loop.start()
  File "c:\Users\bobth\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "c:\Users\bobth\anaconda3\lib\asyncio\base_events.py", line 601, in run_forever
    self._run_once()
  File "c:\Users\bobth\anaconda3\lib\asyncio\base_events.py", line 1905, in _run_once
    handle._run()
  File "c:\Users\bobth\anaconda3\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
    await self.process_one()
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
    await dispatch(*args)
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
    await result
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 730, in execute_request
    reply_content = await reply_content
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 390, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\Users\bobth\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 528, in run_cell
    return super().run_cell(*args, **kwargs)
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
    result = self._run_cell(
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
    return runner(coro)
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
    coro.send(None)
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "c:\Users\bobth\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\bobth\AppData\Local\Temp\ipykernel_7424\864734964.py", line 1, in <module>
    import torch
  File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
  File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
  File "c:\Users\bobth\anaconda3\lib\site-packages\torch\__init__.py", line 1478, in <module>
    _C._initExtension(manager_path())
  File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
  File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
  File "c:\Users\bobth\anaconda3\lib\site-packages\torch\cuda\__init__.py", line 238, in <module>
    _lazy_call(_check_capability)
  File "c:\Users\bobth\anaconda3\lib\site-packages\torch\cuda\__init__.py", line 235, in _lazy_call
    _queued_calls.append((callable, traceback.format_stack()))


In [None]:
from sklearn.metrics import accuracy_score


for epoch in range(epochs):
    inputs, targets = X_train_tensor.to(device), y_train_tensor.to(device)
    optimizer.zero_grad()  # Zero the gradients
    outputs = model(inputs)  # Forward pass
    loss = criterion(outputs, targets)  # Compute the loss
    loss.backward()  # Backward pass (compute gradients)
    optimizer.step()  # Update the weights

    
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')



Epoch [1/100], Loss: 1.363842248916626
Epoch [2/100], Loss: 1.3598623275756836
Epoch [3/100], Loss: 1.3557498455047607
Epoch [4/100], Loss: 1.3516643047332764
Epoch [5/100], Loss: 1.3476084470748901
Epoch [6/100], Loss: 1.3435776233673096
Epoch [7/100], Loss: 1.3395769596099854
Epoch [8/100], Loss: 1.3356046676635742
Epoch [9/100], Loss: 1.3316713571548462
Epoch [10/100], Loss: 1.3277736902236938
Epoch [11/100], Loss: 1.3239063024520874
Epoch [12/100], Loss: 1.3200697898864746
Epoch [13/100], Loss: 1.3162603378295898
Epoch [14/100], Loss: 1.312514066696167
Epoch [15/100], Loss: 1.3088107109069824
Epoch [16/100], Loss: 1.305137276649475
Epoch [17/100], Loss: 1.3014858961105347
Epoch [18/100], Loss: 1.2978638410568237
Epoch [19/100], Loss: 1.2942626476287842
Epoch [20/100], Loss: 1.2906603813171387
Epoch [21/100], Loss: 1.287085771560669
Epoch [22/100], Loss: 1.2835372686386108
Epoch [23/100], Loss: 1.2800122499465942
Epoch [24/100], Loss: 1.2765142917633057
Epoch [25/100], Loss: 1.27303

In [None]:
# Evaluation on test data

model.eval()
with torch.no_grad():
    inputs, targets = X_test_tensor.to(device), y_test_tensor.to(device)
    outputs = model(inputs)
    _, predicted = torch.max(outputs, 1)
    accuracy = accuracy_score(predicted.cpu(), targets.cpu())
    
    print(f'Test Accuracy: {accuracy}')

Test Accuracy: 0.6515151515151515
