In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Global Traffic Accident Analysis\n",
    "## By: Trần Phương Nam (2202085)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Import required libraries\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import folium\n",
    "from datetime import datetime\n",
    "\n",
    "# Set style for better visualizations\n",
    "plt.style.use('seaborn')\n",
    "sns.set_palette('husl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Read the CSV file\n",
    "df = pd.read_csv('global_traffic_accidents.csv')\n",
    "\n",
    "# Convert date to datetime\n",
    "df['Date'] = pd.to_datetime(df['Date'])\n",
    "\n",
    "# Display first few rows and basic information\n",
    "print(\"Dataset Overview:\")\n",
    "print(df.info())\n",
    "print(\"\\nFirst few records:\")\n",
    "print(df.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Accident Frequency Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Monthly accident trend\n",
    "monthly_accidents = df.groupby(df['Date'].dt.to_period('M')).size()\n",
    "\n",
    "plt.figure(figsize=(12, 6))\n",
    "monthly_accidents.plot(kind='line', marker='o')\n",
    "plt.title('Monthly Accident Frequency')\n",
    "plt.xlabel('Month')\n",
    "plt.ylabel('Number of Accidents')\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Weather and Road Conditions Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Weather conditions analysis\n",
    "plt.figure(figsize=(10, 6))\n",
    "df['Weather Condition'].value_counts().plot(kind='pie', autopct='%1.1f%%')\n",
    "plt.title('Distribution of Weather Conditions')\n",
    "plt.axis('equal')\n",
    "plt.show()\n",
    "\n",
    "# Road conditions analysis\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.countplot(data=df, y='Road Condition')\n",
    "plt.title('Frequency of Road Conditions')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Accident Causes Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Analyze causes of accidents\n",
    "plt.figure(figsize=(12, 6))\n",
    "df['Cause'].value_counts().plot(kind='bar')\n",
    "plt.title('Main Causes of Accidents')\n",
    "plt.xlabel('Cause')\n",
    "plt.ylabel('Number of Accidents')\n",
    "plt.xticks(rotation=45)\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Severity Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Calculate average casualties by number of vehicles involved\n",
    "severity_analysis = df.groupby('Vehicles Involved')['Casualties'].mean().reset_index()\n",
    "\n",
    "plt.figure(figsize=(10, 6))\n",
    "sns.barplot(data=severity_analysis, x='Vehicles Involved', y='Casualties')\n",
    "plt.title('Average Casualties by Number of Vehicles Involved')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Geographic Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Create a map centered at the mean coordinates\n",
    "m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=2)\n",
    "\n",
    "# Add markers for each accident\n",
    "for idx, row in df.iterrows():\n",
    "    folium.CircleMarker(\n",
    "        location=[row['Latitude'], row['Longitude']],\n",
    "        radius=5,\n",
    "        popup=f\"Cause: {row['Cause']}<br>Casualties: {row['Casualties']}\",\n",
    "        color='red',\n",
    "        fill=True\n",
    "    ).add_to(m)\n",
    "\n",
    "# Display the map\n",
    "m"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Statistical Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Calculate key statistics\n",
    "print(\"Statistical Summary:\")\n",
    "print(\"\\nTotal number of accidents:\", len(df))\n",
    "print(\"\\nTotal casualties:\", df['Casualties'].sum())\n",
    "print(\"\\nAverage casualties per accident:\", round(df['Casualties'].mean(), 2))\n",
    "print(\"\\nMost common cause:\", df['Cause'].mode()[0])\n",
    "print(\"\\nMost dangerous weather condition:\", \n",
    "      df.groupby('Weather Condition')['Casualties'].mean().sort_values(ascending=False).index[0])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}