In [None]:
import plotly.express as px
import plotly
plotly.offline.init_notebook_mode()

In [None]:
import pandas as pd
import os

df=pd.read_csv('/home/theo2bel/Downloads/archive(3)/crash_data_queensland_1_crash_locations.csv')

# First visualization

In [None]:
fig=px.histogram(df, x="Crash_Day_Of_Week")
fig.update_layout(xaxis={'categoryorder':'total descending'})

We see that most of the crashes are at the end of the week : people are often more tired the last days of the week, so they are more susceptible to have a crash

# Second visualization

In [None]:
fig=px.histogram(df, x="Crash_Nature")
fig.update_layout(xaxis={'categoryorder':'total descending'})

We see that most of the crashes are angle, rear_end and hit object. Most of the crashes are angle and rear-end : we can deduce that the sensors at the rear of a car are more sensitive than those at the front or lateral. Also cars hitting objects are common because they are not vehicles, so we do not pay attention to them as much as other vehicles when we are driving.

# Third visualization

In [None]:
fig=px.histogram(df, x="Crash_Severity")
fig.update_layout(xaxis={'categoryorder':'total descending'})

What we can deduce here is that when there are most of the time injured people when there have been property damage.\
Also, when there are injuries, they are most of the time quite serious : there are more hospitalisations and medical treatments than minor injuries.

# Fourth visualization

In [None]:
import plotly.express as px

df_random_sample = df.sample(frac=0.01, random_state=42) #we take 1 percent of the crashes because otherwise the file is too large

severity_order = ['Property damage only', 'Minor injury', 'Medical treatment', 'Hospitalisation', 'Fatal']
df_random_sample['Severity_Color'] = df_random_sample['Crash_Severity'].astype('category').cat.reorder_categories(severity_order, ordered=True).cat.codes

# We make circles for each crash, their color depend on the severity of the crash (cf legend)
fig = px.scatter_mapbox(df_random_sample, 
                        lat='Crash_Latitude', 
                        lon='Crash_Longitude', 
                        color='Severity_Color',
                        color_continuous_scale='Viridis',
                        size_max=15,
                        opacity=0.7,
                        hover_name='Crash_Severity',
                        title='Crash Severity Map',
                        mapbox_style='carto-positron',
                        zoom=3)

fig.update_layout(coloraxis_colorbar=dict(tickvals=list(range(len(severity_order))), ticktext=severity_order),
                  mapbox=dict(accesstoken='your_mapbox_token'))

fig.show()

This map shows that most of the crashes in queensland are on the east coast and more in the cities (Brisbane, Toowoomba)

# Fifth visualization

In [None]:
import plotly.express as px

# We group by year and severity,and count
df_grouped = df.groupby(['Crash_Year', 'Crash_Severity']).size().reset_index(name='Count')

# We create a line chart of crash counts by year and severity
fig = px.line(df_grouped, x="Crash_Year", y="Count", color="Crash_Severity", 
              title="Crash Counts by Year and Severity",
              labels={"Crash_Year": "Year", "Count": "Number of Crashes", "Crash_Severity": "Severity"})
fig.update_layout(xaxis={'categoryorder': 'category ascending'})

fig.show()


The number of crashes by severity is approximately constant in the time, but hospitalisations are sometimes more present than medical treatments only, and sometimes the opposite

# Sixth visualization

In [None]:
import plotly.express as px


# We group by Crash_Atmospheric_Condition and Crash_Severity and calculate the count
df_grouped = df.groupby(['Crash_Atmospheric_Condition', 'Crash_Severity']).size().reset_index(name='Count')

# We calculate the total count for each "Crash_Atmospheric_Condition"
df_grouped['Total_Count'] = df_grouped.groupby('Crash_Atmospheric_Condition')['Count'].transform('sum')

# We calculate the proportion of each severity
df_grouped['Proportion'] = df_grouped['Count'] / df_grouped['Total_Count']

# We create a bar plot
fig = px.bar(df_grouped, 
             x="Crash_Atmospheric_Condition", 
             y="Proportion", 
             color="Crash_Severity",
             title="Proportion of Crash Severity by Atmospheric Condition",
             labels={"Crash_Atmospheric_Condition": "Atmospheric Condition", "Proportion": "Proportion", "Crash_Severity": "Severity"},
             barmode="stack")
fig.show()

This graph shows the proportion of each crash severity depending on the different atmospheric conditions.\
We can see that there are more fatal accidents when there is fog and smoke than when the time is clear. Moreover, hospitalisations are also more frequent for those cases.