In [1]:
import pandas as pd

In [2]:
# Load the data
data = pd.read_csv('shark_attacks.csv')

In [3]:
# Display the first few rows to understand the structure
print(data.head())

                        _id  index  id  Year        Type    Country  \
0  6696ff4dc10ca7acadf71537      0   0  2024  Unprovoked  AUSTRALIA   
1  6696ff4dc10ca7acadf71538      1   1  2024  Unprovoked        USA   
2  6696ff4dc10ca7acadf71539      2   2  2024  Unprovoked        USA   
3  6696ff4dc10ca7acadf7153a      3   3  2024  Unprovoked  AUSTRALIA   
4  6696ff4dc10ca7acadf7153b      4   4  2024  Unprovoked      INDIA   

               State  Activity Sex  Age  \
0         Queensland  Swimming   F   13   
1             Hawaii   Surfing   M  NaN   
2             Hawaii  Swimming   F   11   
3  Western Australia       NaN   F   46   
4        Maharashtra   Fishing   M   32   

                                     Injury  
0  Minor injuries to back, abdomen and legs  
1            No injury, shark bit surfboard  
2                  Lacerations to left foot  
3                                Leg bitten  
4            Calf of lower left leg injured  


In [4]:
from geopy.geocoders import Nominatim
import time

In [5]:
# Initialize geolocator
geolocator = Nominatim(user_agent="shark_attack_heatmap")

In [6]:
# Function to geocode country and state
def geocode_location(Country, State):
    try:
        location = geolocator.geocode(f"{State}, {Country}")
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except:
        return None, None

In [None]:
# Apply geocoding to each row with a delay to respect rate limits
data['latitude'], data['longitude'] = zip(*data.apply(lambda row: geocode_location(row['Country'], row['State']) if pd.notnull(row['State']) and pd.notnull(row['Country']) else (None, None), axis=1))

In [None]:
# Remove rows where geocoding failed
data = data.dropna(subset=['latitude', 'longitude'])

# Display the updated data
print(data.head())

In [None]:
# Aggregate data to count occurrences at each location
location_counts = data.groupby(['latitude', 'longitude']).size().reset_index(name='count')

# Display the aggregated data
print(location_counts.head())

In [None]:
import folium
from folium.plugins import HeatMap

# Initialize the map centered around a specific location
m = folium.Map(location=[0, 0], zoom_start=2)

# Prepare data for HeatMap
heat_data = [[row['latitude'], row['longitude'], row['count']] for index, row in location_counts.iterrows()]

# Create and add heat map to the base map
HeatMap(heat_data).add_to(m)

# Save the map to an HTML file
m.save('shark_attack_heat_map.html')

In [None]:
import os
import folium
from folium.plugins import HeatMap

# Get current directory of the notebook
notebook_dir = os.path.abspath('')

# Initialize the map centered around a specific location
m = folium.Map(location=[0, 0], zoom_start=2)

# Prepare data for HeatMap
heat_data = [[row['latitude'], row['longitude'], row['count']] for index, row in location_counts.iterrows()]

# Create and add heat map to the base map
HeatMap(heat_data).add_to(m)

# Save the map to an HTML file in the notebook's directory
file_path = os.path.join(notebook_dir, 'shark_attack_heat_map.html')
m.save(file_path)