In [1]:
import pandas as pd
import numpy as np

df= pd.read_csv("0_earthquake_1995-2023.csv")

In [2]:
# Filling in null values of continent column
import pycountry
import pycountry_convert

def map_to_continent(df, country_col='country', location_col='location', continent_col='continent'):
    """
    Map countries and locations to continents.

    Parameters:
    df (pandas.DataFrame): DataFrame containing country and location columns
    country_col (str): Name of the country column
    location_col (str): Name of the location column
    continent_col (str): Name of the continent column to fill

    Returns:
    pandas.DataFrame: DataFrame with filled continent values
    """
    df_copy = df.copy()

    # Country to continent mapping using pycountry
    def get_continent_from_country(country_name):
        try:
            # Use fuzzy search for better matches
            if country_name.lower() == "antarctica":
                return "Antarctica"
            
            country = pycountry.countries.search_fuzzy(country_name)[0]
            if country:
                continent_code = pycountry_convert.country_alpha2_to_continent_code(country.alpha_2)
                return pycountry_convert.convert_continent_code_to_continent_name(continent_code)
        except Exception as e:
            print(f"Error mapping country '{country_name}': {e}")
        return None

    # Special location patterns to continent mapping
    location_patterns = {
        'Atlantic': 'Atlantic Ocean',
        'Mid-Atlantic': 'Atlantic Ocean',
        'Indian Ocean': 'Indian Ocean',
        'Pacific': 'Pacific Ocean',
        'Caribbean': 'North America',
        'Antarctic': 'Antarctica',
        'Kermadec': 'Oceania',
        'Fiji': 'Oceania',
        'Tonga': 'Oceania',
        'Vanuatu': 'Oceania',
        'Kuril': 'Asia',
        'Alaska': 'North America',
        'Philippines': 'Asia',
        'Sumatra': 'Asia',
        'Loyalty Islands': 'Oceania',
        'Macquarie': 'Oceania'
    }

    def get_continent(row):
        # If continent is already filled, return it
        if pd.notna(row[continent_col]):
            return row[continent_col]

        # Try to get continent from country first
        if pd.notna(row[country_col]):
            continent = get_continent_from_country(row[country_col].strip())
            if continent:
                return continent

        # If country not found or is null, try location patterns
        if pd.notna(row[location_col]):
            location = str(row[location_col]).lower()
            for pattern, continent in location_patterns.items():
                if pattern.lower() in location:
                    return continent

        return None

    # Apply the mapping
    mask = df_copy[continent_col].isna()
    if mask.any():
        df_copy.loc[mask, continent_col] = df_copy[mask].apply(get_continent, axis=1)

    # Print statistics
    unmapped = df_copy[df_copy[continent_col].isna()]
    if not unmapped.empty:
        print("\nUnmapped locations:")
        print(unmapped[[country_col, location_col]].drop_duplicates())

    return df_copy



In [3]:
df = map_to_continent(df)

Error mapping country 'United Kingdom of Great Britain and Northern Ireland (the)': united kingdom of great britain and northern ireland (the)
Error mapping country 'Aleutian Islands': aleutian islands
Error mapping country 'United Kingdom of Great Britain and Northern Ireland (the)': united kingdom of great britain and northern ireland (the)
Error mapping country 'United Kingdom of Great Britain and Northern Ireland (the)': united kingdom of great britain and northern ireland (the)
Error mapping country 'United Kingdom of Great Britain and Northern Ireland (the)': united kingdom of great britain and northern ireland (the)
Error mapping country 'United Kingdom of Great Britain and Northern Ireland (the)': united kingdom of great britain and northern ireland (the)

Unmapped locations:
                                               country  \
141  United Kingdom of Great Britain and Northern I...   
381  United Kingdom of Great Britain and Northern I...   
401  United Kingdom of Great Br

In [4]:
df.isna().sum()

Unnamed: 0      0
title           0
magnitude       0
date_time       0
cdi             0
mmi             0
alert         548
tsunami         0
sig             0
net             0
nst             0
dmin            0
gap             0
magType         0
depth           0
latitude        0
longitude       0
location        0
continent       5
country         0
dtype: int64

In [5]:
df.head(20)

Unnamed: 0.1,Unnamed: 0,title,magnitude,date_time,cdi,mmi,alert,tsunami,sig,net,nst,dmin,gap,magType,depth,latitude,longitude,location,continent,country
0,0,"M 6.5 - 42 km W of Sola, Vanuatu",6.5,16-08-2023 12:47,7,4,green,0,657,us,114,7.177,25.0,mww,192.955,-13.88,167.16,"Sola, Vanuatu",Oceania,Vanuatu
1,1,"M 6.5 - 43 km S of Intipucá, El Salvador",6.5,19-07-2023 00:22,8,6,yellow,0,775,us,92,0.679,40.0,mww,69.727,12.81,-88.13,"Intipucá, El Salvador",North America,El Salvador
2,2,"M 6.6 - 25 km ESE of Loncopué, Argentina",6.6,17-07-2023 03:05,7,5,green,0,899,us,70,1.634,28.0,mww,171.371,-38.19,-70.37,"Loncopué, Argentina",South America,Argentina
3,3,"M 7.2 - 98 km S of Sand Point, Alaska",7.2,16-07-2023 06:48,6,6,green,1,860,us,173,0.907,36.0,mww,32.571,54.38,-160.7,"Sand Point, Alaska",North America,United States of America
4,4,M 7.3 - Alaska Peninsula,7.3,16-07-2023 06:48,0,5,,1,820,at,79,0.879451,172.8,Mi,21.0,54.49,-160.8,Alaska Peninsula,North America,United States
5,5,"M 6.6 - 277 km NNE of Codrington, Antigua and ...",6.6,10-07-2023 20:28,5,4,green,1,802,us,95,2.454,37.0,mww,10.0,20.02,-61.1,"Codrington, Antigua and Barbuda",North America,Antigua and Barbuda
6,6,M 6.9 - Tonga,6.9,02-07-2023 10:27,4,4,green,1,741,us,136,1.179,23.0,mww,229.0,-17.85,-174.94,Tonga,Oceania,Tonga
7,7,M 7.2 - south of the Fiji Islands,7.2,15-06-2023 18:06,8,6,green,1,804,us,85,2.59,24.0,mww,167.404,-22.98,-177.21,the Fiji Islands,Oceania,Fiji
8,8,M 6.6 - Panama-Colombia border region,6.6,25-05-2023 03:05,6,6,green,1,733,us,50,2.163,129.0,mww,10.0,8.89,-77.12,Panama-Colombia border region,South America,Colombia
9,9,M 7.1 - southeast of the Loyalty Islands,7.1,20-05-2023 01:51,3,4,green,1,777,us,98,2.812,56.0,mww,35.981,-23.06,170.46,the Loyalty Islands,Europe,France


In [6]:
df["continent"].unique()

array(['Oceania', 'North America', 'South America', 'Europe', 'Asia',
       'Antarctica', None, 'Africa'], dtype=object)

In [7]:
df[df["continent"].isna()]

Unnamed: 0.1,Unnamed: 0,title,magnitude,date_time,cdi,mmi,alert,tsunami,sig,net,nst,dmin,gap,magType,depth,latitude,longitude,location,continent,country
141,153,"M 6.6 - 131km S of Bristol Island, South Sandw...",6.6,27-08-2019 23:55,0,4,green,1,670,us,0,12.896,19.0,mww,16.0,-60.22,-26.58,"Bristol Island, South Sandwich Islands",,United Kingdom of Great Britain and Northern I...
381,401,M 7.7 - Scotia Sea,7.7,17-11-2013 09:04,1,8,green,1,912,us,0,8.05,23.0,mww,10.0,-60.27,-46.4,Scotia Sea,,United Kingdom of Great Britain and Northern I...
382,402,M 6.9 - Scotia Sea,6.9,16-11-2013 03:34,0,5,green,1,732,us,0,8.284,17.0,mww,9.97,-60.26,-47.06,Scotia Sea,,United Kingdom of Great Britain and Northern I...
401,421,M 7.3 - South Sandwich Islands region,7.3,15-07-2013 14:03,0,6,green,1,820,us,398,0.0,43.0,mww,11.0,-60.86,-25.07,South Sandwich Islands region,,United Kingdom of Great Britain and Northern I...
724,750,M 7.6 - Scotia Sea,7.6,04-08-2003 04:37,0,6,,0,889,us,315,0.0,25.2,mwc,10.0,-60.53,-43.41,Scotia Sea,,United Kingdom of Great Britain and Northern I...


In [8]:
df.replace(to_replace=[None], value=np.nan, inplace=True)


In [9]:
df["country"].unique()

array(['Vanuatu', 'El Salvador', 'Argentina', 'United States of America',
       'United States', 'Antigua and Barbuda', 'Tonga', 'Fiji',
       'Colombia', 'France', 'Indonesia', 'New Zealand',
       'Russian Federation (the)', 'Papua New Guinea', 'Afghanistan',
       'Ecuador', 'Tajikistan', 'Turkiye', 'Solomon Islands', 'Panama',
       'Mexico', 'Taiwan', "People's Republic of China", 'Philippines',
       'Brazil', 'Peru', 'Nicaragua', 'New Caledonia', 'Japan', 'Cyprus',
       'Antarctica', 'Haiti', 'Wallis and Futuna', 'Russia', 'Mongolia',
       'Chile', 'Greece', 'Jamaica', 'Turkey',
       'United Kingdom of Great Britain and Northern Ireland (the)',
       'Australia', 'South Sandwich Islands',
       'South Georgia and the South Sandwich Islands',
       'Svalbard and Jan Mayen', 'Canada', 'Venezuela', 'Bolivia',
       'Honduras', 'Costa Rica', 'Iran', 'Russia ', 'New Zealand ',
       'Guatemala', 'Botswana', 'Italy', 'Myanmar', 'India', 'India ',
       'Aleutian Isla

In [10]:
df.loc[df["country"]== "United Kingdom of Great Britain and Northern Ireland (the)" ,"continent"] = "Europe"

In [11]:
df.isna().sum()

Unnamed: 0      0
title           0
magnitude       0
date_time       0
cdi             0
mmi             0
alert         548
tsunami         0
sig             0
net             0
nst             0
dmin            0
gap             0
magType         0
depth           0
latitude        0
longitude       0
location        0
continent       0
country         0
dtype: int64

In [12]:
df.to_csv("1_earthquake_1995-2023.csv")