In [1]:
# Dependencies
import pandas as pd
import csv, os

In [2]:
# File paths
data_path_1 = "Resources/earthquake_database.csv"
data_path_2 = "Resources/Worldwide-Earthquake-database.csv"

In [3]:
# Read CSV
earthquake_data = pd.read_csv(data_path_1)
worldwide_earthquake_database = pd.read_csv(data_path_2)

In [4]:
earthquake_data.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,01/02/1965,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,01/04/1965,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,01/05/1965,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,01/08/1965,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,01/09/1965,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [5]:
# Changing the Date to datetime format
earthquake_data["Date"] = pd.to_datetime(earthquake_data["Date"], utc = True)
earthquake_data.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Magnitude Seismic Stations,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status
0,1965-01-02 00:00:00+00:00,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic
1,1965-01-04 00:00:00+00:00,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic
2,1965-01-05 00:00:00+00:00,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic
3,1965-01-08 00:00:00+00:00,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic
4,1965-01-09 00:00:00+00:00,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic


In [6]:
# Extract the year from the datetime column
earthquake_data["Year"] = pd.DatetimeIndex(earthquake_data["Date"]).year
earthquake_data.head()

Unnamed: 0,Date,Time,Latitude,Longitude,Type,Depth,Depth Error,Depth Seismic Stations,Magnitude,Magnitude Type,...,Azimuthal Gap,Horizontal Distance,Horizontal Error,Root Mean Square,ID,Source,Location Source,Magnitude Source,Status,Year
0,1965-01-02 00:00:00+00:00,13:44:18,19.246,145.616,Earthquake,131.6,,,6.0,MW,...,,,,,ISCGEM860706,ISCGEM,ISCGEM,ISCGEM,Automatic,1965
1,1965-01-04 00:00:00+00:00,11:29:49,1.863,127.352,Earthquake,80.0,,,5.8,MW,...,,,,,ISCGEM860737,ISCGEM,ISCGEM,ISCGEM,Automatic,1965
2,1965-01-05 00:00:00+00:00,18:05:58,-20.579,-173.972,Earthquake,20.0,,,6.2,MW,...,,,,,ISCGEM860762,ISCGEM,ISCGEM,ISCGEM,Automatic,1965
3,1965-01-08 00:00:00+00:00,18:49:43,-59.076,-23.557,Earthquake,15.0,,,5.8,MW,...,,,,,ISCGEM860856,ISCGEM,ISCGEM,ISCGEM,Automatic,1965
4,1965-01-09 00:00:00+00:00,13:32:50,11.938,126.427,Earthquake,15.0,,,5.8,MW,...,,,,,ISCGEM860890,ISCGEM,ISCGEM,ISCGEM,Automatic,1965


In [7]:
# Drop unwanted columns
cleaned_earthquake_df = earthquake_data.loc[:, ["Year",
                                                "Latitude",
                                                "Longitude",
                                                "Magnitude"]]

In [8]:
# Filter the dataset to only 2015 or later and reset index and save to new CSV file
cleaned_earthquake_df = cleaned_earthquake_df.loc[(cleaned_earthquake_df)["Year"] >= 2015]
cleaned_earthquake_df.reset_index(drop = True, inplace = True)
cleaned_earthquake_df.to_csv("Resources/cleaned_earthquake_database.CSV")
cleaned_earthquake_df

Unnamed: 0,Year,Latitude,Longitude,Magnitude
0,2015,6.5739,60.3653,5.5
1,2015,-43.0546,171.2364,5.6
2,2015,55.2049,163.2446,5.6
3,2015,5.9045,-82.6576,6.5
4,2015,-61.6118,161.3346,5.7
...,...,...,...,...
910,2016,38.3917,-118.8941,5.6
911,2016,38.3777,-118.8957,5.5
912,2016,36.9179,140.4262,5.9
913,2016,-9.0283,118.6639,6.3


In [9]:
worldwide_earthquake_database.head()

Unnamed: 0,I_D,FLAG_TSUNAMI,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,FOCAL_DEPTH,EQ_PRIMARY,...,TOTAL_MISSING,TOTAL_MISSING_DESCRIPTION,TOTAL_INJURIES,TOTAL_INJURIES_DESCRIPTION,TOTAL_DAMAGE_MILLIONS_DOLLARS,TOTAL_DAMAGE_DESCRIPTION,TOTAL_HOUSES_DESTROYED,TOTAL_HOUSES_DESTROYED_DESCRIPTION,TOTAL_HOUSES_DAMAGED,TOTAL_HOUSES_DAMAGED_DESCRIPTION
0,1,No,-2150,,,,,,,7.3,...,,,,,,,,,,
1,2,Yes,-2000,,,,,,,,...,,,,,,,,,,
2,3,No,-2000,,,,,,18.0,7.1,...,,,,,,1.0,,1.0,,
3,5877,Yes,-1610,,,,,,,,...,,,,,,3.0,,,,
4,8,No,-1566,,,,,,,,...,,,,,,,,,,


In [10]:
# Drop unwanted columns
cleaned_worldwide_earthquake_df = worldwide_earthquake_database.loc[:, ["YEAR",
                                                                        "EQ_PRIMARY",
                                                                        "EQ_MAG_MW",
                                                                        "EQ_MAG_MS",
                                                                        "EQ_MAG_MB",
                                                                        "EQ_MAG_ML",
                                                                        "EQ_MAG_MFA",
                                                                        "EQ_MAG_UNK",
                                                                        "INTENSITY",
                                                                        "COUNTRY",
                                                                        "LATITUDE",
                                                                        "LONGITUDE",
                                                                        "TOTAL_DEATHS",
                                                                        "INJURIES",
                                                                        "TOTAL_DAMAGE_MILLIONS_DOLLARS"]]

cleaned_worldwide_earthquake_df.head()

Unnamed: 0,YEAR,EQ_PRIMARY,EQ_PRIMARY.1,EQ_MAG_MW,EQ_MAG_MS,EQ_MAG_MB,EQ_MAG_ML,EQ_MAG_MFA,EQ_MAG_UNK,INTENSITY,COUNTRY,LATITUDE,LONGITUDE,TOTAL_DEATHS,INJURIES,TOTAL_DAMAGE_MILLIONS_DOLLARS
0,-2150,7.3,7.3,,,,,,7.3,,JORDAN,31.1,35.5,,,
1,-2000,,,,,,,,,10.0,SYRIA,35.683,35.8,,,
2,-2000,7.1,7.1,,7.1,,,,,10.0,TURKMENISTAN,38.0,58.2,1.0,,
3,-1610,,,,,,,,,,GREECE,36.4,25.4,,,
4,-1566,,,,,,,,,10.0,ISRAEL,31.5,35.3,,,


In [11]:
# Filter the dataset to only 2015 or later and reset index and save to new CSV file
cleaned_worldwide_earthquake_df = cleaned_worldwide_earthquake_df.loc[(cleaned_worldwide_earthquake_df)["YEAR"] >= 2015]
cleaned_worldwide_earthquake_df.reset_index(drop = True, inplace = True)
cleaned_worldwide_earthquake_df.to_csv("Resources/cleaned_worldwide_earthquake_data.CSV")
cleaned_worldwide_earthquake_df

Unnamed: 0,YEAR,EQ_PRIMARY,EQ_PRIMARY.1,EQ_MAG_MW,EQ_MAG_MS,EQ_MAG_MB,EQ_MAG_ML,EQ_MAG_MFA,EQ_MAG_UNK,INTENSITY,COUNTRY,LATITUDE,LONGITUDE,TOTAL_DEATHS,INJURIES,TOTAL_DAMAGE_MILLIONS_DOLLARS
0,2015,5.3,5.3,,,5.3,,,,6.0,CHINA,29.353,103.199,,11.0,
1,2015,6.7,6.7,6.7,,,,,,4.0,JAPAN,39.856,142.881,,,
2,2015,6.4,6.4,6.4,,,,,,5.0,VANUATU,-16.431,168.148,,,
3,2015,4.5,4.5,,,4.5,,,,,CHINA,23.01,101.755,,,
4,2015,3.6,3.6,,,,3.6,,,,BOSNIA-HERZEGOVINA,44.534,18.934,4.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,2020,5.3,5.3,5.3,,,,,,,CHINA,33.124,98.916,,,11.28
300,2020,6.6,6.6,6.6,,,,,,7.0,GREECE,34.205,25.712,,,
301,2020,4.6,4.6,,,4.6,,,,,IRAN,35.725,52.044,2.0,22.0,
302,2020,5.2,5.2,5.2,,,,,,,CHINA,27.296,103.281,4.0,24.0,
