# Data Visualization

In [None]:
import geopandas as gpd #Library for using Geospatial Dataframes
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import seaborn as sns #Library for Data Vizualisation
import pandas as pd

In [None]:
data = pd.read_csv("data/clean_data.csv")

## Light Conditions

###### NOT: Yoğunluğa göre bakılabilir

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(
    data,x="Light_Conditions",
    hue="Light_Conditions", 
    legend=False)

##### Işıklandırma verisinin kategorizasyonu

In [None]:
data["Darkness_Presence"]=data["Light_Conditions"].replace([   "Daylight: Street light present",
                                                               "Darkness: Street lights present and lit",
                                                               "Darkness: Street lighting unknown",
                                                               "Darkness: Street lights present but unlit",
                                                               "Darkeness: No street lighting"], 
                                                               [0,1,2,2,2])

In [None]:
data=data.drop(["Light_Conditions"],axis=1)

## Weather Conditions

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(
    data,
    x="Weather_Conditions", 
    hue="Weather_Conditions", 
    legend=False)

In [None]:
weather_cond = data["Weather_Conditions"].value_counts()

figure(
    figsize=(15, 15), 
    dpi=80)

plt.pie(
    weather_cond.values, 
    labels = weather_cond.index, 
    colors = sns.color_palette(),
    startangle = 30,
    textprops={"size": "medium"},
    explode=(0.02,0.02,0.02,0.02,0.3,0.4,0.3,0.4,0.3),
    autopct="%1.1f%%")
plt.legend()
plt.title("How Do Weather Events Impact Accidents")
plt.show()

##### Hava durumu verisinin kategorizasyonu

In [None]:
data["Weather_Conditions"]=data["Weather_Conditions"].replace(["Raining without high winds", 
                                                               "Fine without high winds",
                                                               "Snowing without high winds", 
                                                               "Other", 
                                                               "Fine with high winds",
                                                               "Raining with high winds", 
                                                               "Fog or mist",
                                                               "Snowing with high winds"],
                                                               ["No High winds",
                                                                "No High winds",
                                                                "No High winds",
                                                                "No High winds",
                                                                "High winds",
                                                                "High winds",
                                                                "Fog",
                                                                "High winds"])

In [None]:
mapping = {
    "No High winds": 0,
    "High winds": 2,
    "Fog": 0,
    "Unknown":0
}

# Use the replace function to convert the column to ordinal values
data["Weather_Conditions"] = data["Weather_Conditions"].replace(mapping)

## Road Conditions

In [None]:
plt.figure(figsize=(20,10))
palette = ["r","g","b","orange","purple"]
sns.countplot(
    data,x="Road_Surface_Conditions",
    hue="Road_Surface_Conditions", 
    legend=False)

In [None]:
road_cond = data["Road_Surface_Conditions"].value_counts()

figure(
    figsize=(15, 15), 
    dpi=80)

plt.pie(
    road_cond.values, 
    labels = road_cond.index, 
    colors = sns.color_palette(),
    startangle = 30,
    textprops={"size": "medium"},
    explode=(0.02,0.02,0.02,0.02,0.3,0.4),
    autopct="%1.1f%%")
plt.legend()
plt.title("How Do Weather Events Impact Accidents")
plt.show()

In [None]:
#Display different aggregations to evaluate our dataset
def display_aggregation(data, *columns):
    H = list(columns)
    C= list(columns)
    C.append("Accident_Index")
    print(C)
    print(data[C].groupby(by=H).count().sort_values(by=[H[0]]+["Accident_Index"],ascending=[True,False]).to_string())

In [None]:
display_aggregation(data,"Road_Surface_Conditions")

In [None]:
mapping = {
    "Dry": 5,
    "Wet/Damp": 4,
    "Frost/Ice": 3,
    "Snow": 2,
    "Flood (Over 3cm of water)": 1,
    "Not Specified": 0,
    pd.NA:pd.NA
}

# Use the replace function to convert the column to ordinal values
data["Road_Surface_Conditions_Ordinal"] = data["Road_Surface_Conditions"].replace(mapping)

In [None]:
sns.countplot(data,x="Road_Surface_Conditions_Ordinal", order=data["Road_Surface_Conditions_Ordinal"])

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
variables = ["Urban_or_Rural_Area", "Accident_Severity","Darkness_Presence", "Weather_Conditions"]

# Loop through each variable and plot a countplot on a separate subplot
for i, var in enumerate(variables):
    row = i // 2
    col = i % 2
    sns.countplot(data, x="Road_Surface_Conditions_Ordinal", hue=var, ax=axs[row, col])
    axs[row, col].set_title(var)
    axs[row, col].legend(loc="upper right")  # move the legend to upper right corner

fig.suptitle("Road Surface Conditions vs. Other Variables")

# Adjust the spacing between subplots to avoid overlapping legends
fig.tight_layout()

plt.show()

In [None]:
print("Number_of_Vehicles")
display_aggregation(data,"Road_Surface_Conditions_Ordinal","Number_of_Vehicles")
print(" ")
print("Number_of_Casualties")
display_aggregation(data,"Road_Surface_Conditions_Ordinal","Number_of_Casualties")

In [None]:
display_aggregation(data,"Road_Surface_Conditions_Ordinal")

In [None]:
data_cleaned = data.copy()
data_cleaned["Road_Surface_Conditions"]=data["Road_Surface_Conditions_Ordinal"]
data_cleaned

##### Junction Control

In [None]:
display_aggregation(data,"Junction_Control")

##### Number of Vehicles / Number of Casualties

In [None]:
sns.scatterplot(data_cleaned,x="Number_of_Vehicles",y="Number_of_Casualties")

In [None]:
sns.boxplot(data_cleaned["Number_of_Vehicles"])

In [None]:
sns.boxplot(data_cleaned["Number_of_Casualties"])

###### Deleting Outliers

In [None]:
data_cleaned=data_cleaned[(data_cleaned["Number_of_Vehicles"]<25)&(data_cleaned["Number_of_Casualties"]<50)]

##### Pedestrian Crossing Human Control & Pedestrian Crossing Physical Facilities

In [None]:
custom_palette = ["#FF5733", "#3498DB", "#F39C12", "#2ECC71", "#9B59B6"]
sns.set_palette(custom_palette)

In [None]:
sns.countplot(data_cleaned,x="Pedestrian_Crossing-Human_Control",
              hue="Pedestrian_Crossing-Human_Control")

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(data_cleaned,x= "Pedestrian_Crossing-Physical_Facilities", 
              hue="Pedestrian_Crossing-Physical_Facilities")

In [None]:
data_cleaned.groupby(by="Pedestrian_Crossing-Human_Control").size()

#### Çok anlamlı veri içermedikleri için bu sütunları sildim

In [None]:
data_cleaned=data_cleaned.drop(["Pedestrian_Crossing-Human_Control","Pedestrian_Crossing-Physical_Facilities"],axis=1)

## Accidents Severity

In [None]:
mappings = {
    1:"Killed",
    2:"Serious", 
    3:"Slight"
}
data_copy = data.copy()

data_copy["Accident_Severity_Index"] = data_copy["Accident_Severity"].map(mappings)

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(
    data_copy,
    x="Accident_Severity",
    hue="Accident_Severity", 
    legend=False)

In [None]:
plt.figure(figsize=(20,10))
sns.countplot(
    data_copy,
    x="Accident_Severity_Index",
    hue="Accident_Severity_Index", 
    legend=False)

## Accidents by year and weekdays

In [None]:
mappings = {
    1:"Monday",
    2:"Tuesday", 
    3:"Wednesday",
    4:"Thursday",
    5:"Friday", 
    6:"Saturday",
    7:"Sunday"
}
data_copy1 = data.copy()

data_copy1["Day_of_Week_Index"] = data_copy1["Day_of_Week"].map(mappings)
week1 = data_copy1["Day_of_Week_Index"].value_counts()
sorter = ["Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
week2 = week1.reindex(sorter)
week2.index

In [None]:
year = data["Year"].value_counts()
week = data["Day_of_Week"].value_counts()
week1 = data_copy1["Day_of_Week_Index"].value_counts()

plt.figure(figsize = (20,9),facecolor="grey")
plt.subplot(1, 2, 1)

sns.barplot(x = year.index, y = year.values)
plt.ylabel("Number of Accidents",weight="bold")
plt.xlabel("Years",weight="bold")

plt.title("Number of Traffic Acciddents By years",weight="bold")

plt.subplot(1, 2, 2)

plt.barh(week2.index,week2.values)
plt.title("Number of Traffic Acciddents By Weekdays",weight="bold")
plt.xlabel("Number of Accidents",weight="bold")
plt.ylabel("Weekdays",weight="bold")
plt.show()

In [None]:
#Get Month as a temporary column
data["int-Date"]=data["Date"].apply(lambda x:int(x[3:5]))
#Get Hour as a temporary column
data["int-Time"]=data["Time"].apply(lambda x:int(str(x)[0:2]))

In [None]:
display_aggregation(data_cleaned,"Day_of_Week","Accident_Severity")

#### 2nd Road Class

In [None]:
plt.figure(figsize=(10,10))
sns.countplot(data_cleaned,hue="2nd_Road_Class",x="Accident_Severity")

In [None]:
plt.figure(figsize=(10,10))
data_test=data_cleaned[(data_cleaned["2nd_Road_Class"].isin([-1,6]))==False]
sns.countplot(data_test,hue="2nd_Road_Class",x="Accident_Severity")

##### Çok anlamlı veri içermediği için 2nd Road Class sütununu sildim.

In [None]:
data_cleaned=data_cleaned.drop(["2nd_Road_Class","2nd_Road_Number"],axis=1)

#### Special Conditions at Site

In [None]:
data_cleaned.groupby(by=["Special_Conditions_at_Site"]).size()

In [None]:
sns.countplot(data_cleaned.loc[data_cleaned["Special_Conditions_at_Site"]!="None"],hue="Special_Conditions_at_Site",x="Accident_Severity")

### Veri incelendi ve görselleştirildi. Elde edilen sonuçlara göre son temizleme işlemleri yapıldı.

In [None]:
data_cleaned.to_csv("data/data_cleaned.csv")

In [None]:
final_data=data_cleaned.drop(["Local_Authority_(District)",
                              "Local_Authority_(Highway)",
                              "Carriageway_Hazards",
                              "Accident_Index",
                              "Police_Force",
                              "1st_Road_Number"],
                            axis=1)

In [None]:
final_data["datetime"] = pd.to_datetime(final_data["Date"] + " " + final_data["Time"], format="%d/%m/%Y %H:%M")

# convert the datetime column to ISO 8601 format
final_data["datetime"] = final_data["datetime"].dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

In [None]:
final_data=final_data.drop(["Date","Time","Location_Easting_OSGR","Location_Northing_OSGR","Day_of_Week","Road_Surface_Conditions"],axis=1)

In [None]:
final_data=final_data.drop(["Unnamed: 0"],axis=1)

In [None]:
# final_data=final_data.drop(["int-Time","int-Date"],axis=1)

In [None]:
final_data.head()