In [2]:
# SOURCE
SOURCE = "https://www.tesladeaths.com"


In [3]:
from requests_html import HTMLSession, Element

session = HTMLSession()
r: session = session.get(SOURCE)


Getting the `table` element that houses the data.



In [4]:
table: Element = r.html.find("#dttable", first=True)
table


<Element 'table' class=('waffle',) cellspacing='0' cellpadding='0' aria-describedby='tblDesc' id='dttable' summary='Crashes involving Tesla deaths, from 2013 to 2023'>

Finding all row headers using `th` tag.



In [5]:
table_headers = table.find("th")
table_headers = [row.text for row in table_headers]
table_headers


['Case #',
 'Year',
 'Date',
 'Country',
 'State',
 'Description',
 'Deaths',
 'Tesla driver',
 'Tesla occupant',
 'Other vehicle',
 'Cyclists/ Peds',
 'TSLA+cycl / peds',
 'Model',
 'AutoPilot claimed',
 'Reported in NHTSA SGO',
 'Verified Tesla Autopilot Death',
 'Excerpt Verifying Tesla Autopilot Deaths',
 'Source',
 'Note',
 'Deceased 1',
 'Deceased 2',
 'Deceased 3',
 'Deceased 4']

Collecting all data rows and excluding the last 350 since there is no usable data in there.



In [6]:
table_data = table.find("td")[:-350]
# * The last 350 td"s do not have useful data


There were some challenges arranging the data in the right way.

-   The urls in the table are truncated, which means simply finding the text would not suffice.

-   There are more than **one** URL per row. There are numbers in the table are _hyperlinked_ which means there are more URLs than are rows, which makes it difficult to simply find all the `a` tags and plug them into the table at the right index while looping through all the elements in `table_data`. Speaking of which...

-   The collection of elements in `table_data` is simply a dump of the table; not by row. There are 23 columns in the table therefore, for each 23 elements found from the beginning is one row.



In [7]:
rows = []
for i in list(range(0, len(table_data), 23)):
    build_row = []
    for index, data in enumerate(table_data[0 + i : 23 + i]):
        if data.text.startswith("http"):
            build_row.append(*data.links)
        else:
            build_row.append(data.text)
    rows.append(build_row)


Confirming that I got the URLs right.



In [8]:
for row in rows:
    if row[0] == "349":
        print(row)


['349', '2023', '7/21/2023', 'USA', 'VA', 'Tesla crashes into side of truck', '1', '1', '-', '-', '-', '1', 'Y', '1', '13781-5996', '1', '-', 'https://web.archive.org/web/20230723185542/https://www.fauquier.com/news/article_0383d91a-2732-11ee-842d-739e7da7cbcf.html', '', 'Pablo Teodoro III', '', '', '']


Mandatory conversion to DataFrame üòÖ



In [9]:
import pandas as pd

df = pd.DataFrame(rows, columns=table_headers)
# Converting necessary columns from str to int values
int_value_columns = df.columns[6:12]
df[int_value_columns] = (
    df[int_value_columns].apply(pd.to_numeric, errors="coerce").fillna(0).astype("int")
)

df.dtypes


Case #                                      object
Year                                        object
Date                                        object
Country                                     object
State                                       object
Description                                 object
Deaths                                       int64
Tesla driver                                 int64
Tesla occupant                               int64
Other vehicle                                int64
Cyclists/ Peds                               int64
TSLA+cycl / peds                             int64
Model                                       object
AutoPilot claimed                           object
Reported in NHTSA SGO                       object
Verified Tesla Autopilot Death              object
Excerpt Verifying Tesla Autopilot Deaths    object
Source                                      object
Note                                        object
Deceased 1                     

In [10]:
df.head()


Unnamed: 0,Case #,Year,Date,Country,State,Description,Deaths,Tesla driver,Tesla occupant,Other vehicle,...,AutoPilot claimed,Reported in NHTSA SGO,Verified Tesla Autopilot Death,Excerpt Verifying Tesla Autopilot Deaths,Source,Note,Deceased 1,Deceased 2,Deceased 3,Deceased 4
0,377,2023,11/15/2023,USA,NC,Moped runs into Tesla,1,0,0,1,...,-,-,-,-,https://web.archive.org/web/20231117073442/htt...,,,,,
1,376,2023,11/4/2023,USA,WA,Tesla rolls over and hits enbankment,1,1,0,0,...,-,-,-,-,https://web.archive.org/web/20231106013012/htt...,,,,,
2,375,2023,11/3/2023,USA,FL,Tesla hits a dislodged motorcyclist,1,0,0,0,...,-,-,-,-,https://web.archive.org/web/20231106001806/htt...,,,,,
3,374,2023,10/30/2023,USA,NY,Tesla hits multiple cars,1,0,1,0,...,-,-,-,-,https://web.archive.org/web/20231031221805/htt...,,,,,
4,373,2023,10/30/2023,USA,NY,DUI,1,0,0,1,...,-,-,-,-,https://web.archive.org/web/20231031221230/htt...,,,,,


Played around with the new library `Polars` üêª‚Äç‚ùÑÔ∏è which is supposed to be [`fast as fuck boiiiii`](https://www.youtube.com/shorts/6E7ZGCfruaw). It is indeed, or should be in theory. `Polars` store data in DataFrames in _columnar_ format as opposed to the classical row format _Pandas_ üêº uses.



In [11]:
import polars as pl

pl_rows = pl.DataFrame(rows)
pl_rows


column_0,column_1,column_2,column_3,column_4,column_5,column_6,column_7,column_8,column_9,column_10,column_11,column_12,column_13,column_14,column_15,column_16,column_17,column_18,column_19,column_20,column_21,column_22,column_23,column_24,column_25,column_26,column_27,column_28,column_29,column_30,column_31,column_32,column_33,column_34,column_35,column_36,…,column_350,column_351,column_352,column_353,column_354,column_355,column_356,column_357,column_358,column_359,column_360,column_361,column_362,column_363,column_364,column_365,column_366,column_367,column_368,column_369,column_370,column_371,column_372,column_373,column_374,column_375,column_376,column_377,column_378,column_379,column_380,column_381,column_382,column_383,column_384,column_385,column_386
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""377""","""376""","""375""","""374""","""373""","""372""","""371""","""370""","""369""","""368""","""367.1""","""367""","""366""","""365""","""364""","""363""","""361""","""360""","""359""","""358""","""357.1""","""357""","""356""","""355""","""354""","""353""","""352""","""351""","""350""","""349""","""348""","""346""","""345""","""344""","""343""","""342.2""","""342.1""",…,"""37""","""36""","""35""","""34""","""33""","""32""","""31""","""30""","""29""","""28""","""27""","""26""","""25""","""24""","""23""","""22""","""21""","""20""","""19""","""18""","""17""","""16""","""15""","""14""","""13""","""12""","""11""","""10""","""9""","""8""","""7""","""6""","""5""","""4""","""3""","""2""","""1"""
"""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""","""2023""",…,"""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2017""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2016""","""2015""","""2015""","""2015""","""2015""","""2015""","""2014""","""2014""","""2014""","""2014""","""2013""","""2013"""
"""11/15/2023""","""11/4/2023""","""11/3/2023""","""10/30/2023""","""10/30/2023""","""10/26/2023""","""10/14/2023""","""10/14/2023""","""10/14/2023""","""10/13/2023""","""10/12/2023""","""10/10/2023""","""10/4/2023""","""10/2/2023""","""9/30/2023""","""9/25/2023""","""9/24/2023""","""9/4/2023""","""9/1/2023""","""8/29/2023""","""8/14/2023""","""8/13/2023""","""8/12/2023""","""8/12/2023""","""8/1/2023""","""7/31/2023""","""7/29/2023""","""7/29/2023""","""7/22/2023""","""7/21/2023""","""7/20/2023""","""7/16/2023""","""7/8/2023""","""7/8/2023""","""7/8/2023""","""7/7/2023""","""7/7/2023""",…,"""11/22/2017""","""11/14/2017""","""9/24/2017""","""8/13/2017""","""7/21/2017""","""6/25/2017""","""6/10/2017""","""5/11/2017""","""4/26/2017""","""3/13/2017""","""1/17/2017""","""12/20/2016""","""11/23/2016""","""11/3/2016""","""10/10/2016""","""10/7/2016""","""9/7/2016""","""9/1/2016""","""8/15/2016""","""7/29/2016""","""7/22/2016""","""6/8/2016""","""5/7/2016""","""4/22/2016""","""4/8/2016""","""1/20/2016""","""12/28/2015""","""12/22/2015""","""11/18/2015""","""6/22/2015""","""1/22/2015""","""12/30/2014""","""7/14/2014""","""7/4/2014""","""7/4/2014""","""11/2/2013""","""4/2/2013"""
"""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""UK""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""UK""","""USA""","""Germany""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""Germany""","""USA""","""USA""",…,"""Switzerland""","""Australia""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""Norway""","""USA""","""Germany""","""USA""","""USA""","""USA""","""USA""","""USA""","""Holland""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""China""","""USA""","""Canada""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA""","""USA"""
"""NC""","""WA""","""FL""","""NY""","""NY""","""CA""","""-""","""WA""","""CA""","""CA""","""SC""","""MA""","""CA""","""CA""","""CA""","""VA""","""PA""","""VA""","""ID""","""TX""","""-""","""NJ""","""-""","""CA""","""CA""","""TX""","""UT""","""TX""","""NC""","""VA""","""CA""","""FL""","""MN""","""CA""","""-""","""CA""","""CA""",…,"""-""","""-""","""TN""","""AZ""","""AZ""","""CA""","""HI""","""FL""","""-""","""CA""","""-""","""CA""","""IL""","""IN""","""NH""","""CA""","""-""","""FL""","""CA""","""FL""","""CA""","""HI""","""FL""","""CA""","""OH""","""-""","""TX""","""-""","""CA""","""CA""","""CA""","""CA""","""CA""","""CA""","""CA""","""CA""","""CA"""
"""Moped runs int‚Ä¶","""Tesla rolls ov‚Ä¶","""Tesla hits a d‚Ä¶","""Tesla hits mul‚Ä¶","""DUI""","""Motorcycle cra‚Ä¶","""Cyclist hit by‚Ä¶","""Car hit by onc‚Ä¶","""Stolen Tesla h‚Ä¶","""Tesla hits div‚Ä¶","""Tesla hits tre‚Ä¶","""Tesla hits car‚Ä¶","""Head-on collis‚Ä¶","""Tesla heading ‚Ä¶","""Tesla hits pol‚Ä¶","""Rear-end car c‚Ä¶","""Collision""","""Motorcycle and‚Ä¶","""Tesla crosses ‚Ä¶","""Tesla crashes ‚Ä¶","""Tesla hits bic‚Ä¶","""Motorcycle hit‚Ä¶","""Tesla hits gua‚Ä¶","""Multi-car acci‚Ä¶","""Motorcycle hit‚Ä¶","""Hit and run""","""Tesla crashes ‚Ä¶","""Tesla crashes ‚Ä¶","""Tesla crashes ‚Ä¶","""Tesla crashes ‚Ä¶","""Multi-car acci‚Ä¶","""Tesla runs off‚Ä¶","""Speeding Tesla‚Ä¶","""Tesla crashes ‚Ä¶","""Tesla crashes ‚Ä¶","""Tesla hits fir‚Ä¶","""Multi Car acci‚Ä¶",…,"""Tesla rear end‚Ä¶","""Tesla kills cy‚Ä¶","""Tesla drives o‚Ä¶","""Tesla kills mo‚Ä¶","""Wrong way driv‚Ä¶","""Tesla strikes ‚Ä¶","""Unclear; see n‚Ä¶","""Tesla into onc‚Ä¶","""Tesla into onc‚Ä¶","""Tesla kills pe‚Ä¶","""Driver died in‚Ä¶","""Tesla kills pe‚Ä¶","""Driver dead in‚Ä¶","""Indianapolis f‚Ä¶","""Pileup""","""Tesla kills mo‚Ä¶","""Drove into woo‚Ä¶","""Motorcyclist r‚Ä¶","""Tesla rear end‚Ä¶","""Tesla kills pe‚Ä¶","""Passat strikes‚Ä¶","""Nissan Altima ‚Ä¶","""Autopilot into‚Ä¶","""Pedestrian kil‚Ä¶","""Chevy Cobalt w‚Ä¶","""Autopilot into‚Ä¶","""Sudden uninten‚Ä¶","""Struck by dump‚Ä¶","""Tesla kills pe‚Ä¶","""Tesla drives o‚Ä¶","""Tesla drives o‚Ä¶","""Tesla drives o‚Ä¶","""Tesla kills mo‚Ä¶","""Thief crashes ‚Ä¶","""Tesla rear end‚Ä¶","""Tesla kills cy‚Ä¶","""Tesla veers in‚Ä¶"
"""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""2""","""2""","""1""","""1""","""1""","""2""","""1""","""1""","""3""","""1""","""4""","""2""","""1""","""1""","""1""","""1""","""1""","""1""","""3""","""1""","""1""","""1""","""1""","""1""","""1""","""3""","""1""","""2""","""1""",…,"""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""2""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""1""","""3""","""1""","""2"""
"""-""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""1""","""1""","""1""","""-""","""1""","""1""","""1""","""-""","""""","""-""","""1""","""1""","""-""","""-""","""1""","""""","""-""","""-""","""1""","""1""","""1""","""1""","""-""","""1""","""-""","""1""","""1""","""1""","""-""",…,"""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""1""","""1""","""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""-""","""1""","""1""","""1""","""-""","""1""","""1""","""1""","""-""","""1""","""-""","""-""","""-"""
"""-""","""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""""","""-""","""2""","""1""","""-""","""-""","""-""","""""","""-""","""-""","""2""","""-""","""-""","""-""","""-""","""-""","""-""","""2""","""-""","""1""","""-""",…,"""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""""","""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""-"""
"""1""","""-""","""-""","""-""","""1""","""1""","""-""","""1""","""1""","""-""","""-""","""1""","""-""","""1""","""-""","""1""","""""","""1""","""1""","""-""","""-""","""1""","""-""","""""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""-""","""-""","""1""",…,"""1""","""-""","""-""","""1""","""1""","""1""","""1""","""1""","""1""","""-""","""-""","""-""","""-""","""-""","""1""","""1""","""-""","""1""","""-""","""-""","""1""","""1""","""-""","""-""","""1""","""-""","""-""","""-""","""-""","""-""","""-""","""-""","""1""","""-""","""3""","""-""","""2"""


In [12]:
df.dtypes


Case #                                      object
Year                                        object
Date                                        object
Country                                     object
State                                       object
Description                                 object
Deaths                                       int64
Tesla driver                                 int64
Tesla occupant                               int64
Other vehicle                                int64
Cyclists/ Peds                               int64
TSLA+cycl / peds                             int64
Model                                       object
AutoPilot claimed                           object
Reported in NHTSA SGO                       object
Verified Tesla Autopilot Death              object
Excerpt Verifying Tesla Autopilot Deaths    object
Source                                      object
Note                                        object
Deceased 1                     

The date is stored in the "weird" format, by which I mean mm/dd/yyyy is which utterly unreadable üòë It is possible to convert them into `datetime` objects.



In [13]:
df["Date"]


0      11/15/2023
1       11/4/2023
2       11/3/2023
3      10/30/2023
4      10/30/2023
          ...    
382     7/14/2014
383      7/4/2014
384      7/4/2014
385     11/2/2013
386      4/2/2013
Name: Date, Length: 387, dtype: object

In [14]:
from datetime import datetime
import logging

logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(name)s:%(message)s", level=logging.INFO
)


def convert_datestring(date_string):
    try:
        date_object = datetime.strptime(date_string, "%m/%d/%Y")
        return date_object.strftime("%Y-%m-%d")
    except ValueError as e:
        logging.warning(f"Could not convert {date_string} into datetime object: {e}")
        logging.info("Assigning a random date.")
        date_parts = date_string.split("/")
        for index, date_part in enumerate(date_parts):
            try:
                int(date_part)
            except ValueError:
                date_parts[index] = "12"
                return convert_datestring("/".join(date_parts))


df["Date"] = df["Date"].apply(convert_datestring)


2024-01-19 23:33:31,028 INFO:root:Assigning a random date.
2024-01-19 23:33:31,030 INFO:root:Assigning a random date.
2024-01-19 23:33:31,033 INFO:root:Assigning a random date.


In [15]:
df["Date"]


0      2023-11-15
1      2023-11-04
2      2023-11-03
3      2023-10-30
4      2023-10-30
          ...    
382    2014-07-14
383    2014-07-04
384    2014-07-04
385    2013-11-02
386    2013-04-02
Name: Date, Length: 387, dtype: object

Now let's convert the dates into a datetime object



In [16]:
df["Date"] = pd.to_datetime(df["Date"])
df["Date"]


0     2023-11-15
1     2023-11-04
2     2023-11-03
3     2023-10-30
4     2023-10-30
         ...    
382   2014-07-14
383   2014-07-04
384   2014-07-04
385   2013-11-02
386   2013-04-02
Name: Date, Length: 387, dtype: datetime64[ns]

`Holland` and `Netherlands` appear in the list of countries, although they are the same! Replacing the former with the latter.



In [17]:
df["Country"].replace({"Holland": "Netherlands"}, inplace=True)


Finally! Write the data out to a `.csv` file.



In [18]:
df.to_csv("./data.csv", index=None)


`dtypes` change when reading from `csv`, as it tries to infer the data type for each column. For example, the year converts to `int64`. It is possible to change the `dtype` into something else by passing the `dtype` argument in `read_csv` using a `key-value` of a column name and desired `dtype`.



In [19]:
df = pd.read_csv("./data.csv")
df.dtypes


Case #                                      float64
Year                                          int64
Date                                         object
Country                                      object
State                                        object
Description                                  object
Deaths                                        int64
Tesla driver                                  int64
Tesla occupant                                int64
Other vehicle                                 int64
Cyclists/ Peds                                int64
TSLA+cycl / peds                              int64
Model                                        object
AutoPilot claimed                            object
Reported in NHTSA SGO                        object
Verified Tesla Autopilot Death               object
Excerpt Verifying Tesla Autopilot Deaths     object
Source                                       object
Note                                         object
Deceased 1  

In [20]:
df = pd.read_csv("./data.csv", dtype={"Case #": str, "Year": str, "Date": str})
df.dtypes


Case #                                      object
Year                                        object
Date                                        object
Country                                     object
State                                       object
Description                                 object
Deaths                                       int64
Tesla driver                                 int64
Tesla occupant                               int64
Other vehicle                                int64
Cyclists/ Peds                               int64
TSLA+cycl / peds                             int64
Model                                       object
AutoPilot claimed                           object
Reported in NHTSA SGO                       object
Verified Tesla Autopilot Death              object
Excerpt Verifying Tesla Autopilot Deaths    object
Source                                      object
Note                                        object
Deceased 1                     