In [1]:
# Dependencies
import pandas as pd

In [2]:
# File paths
data_path_1 = "Resources/volcano_data_2010.csv"
data_path_2 = "Resources/Worldwide-Earthquake-database.csv"
data_path_3 = "Resources/tsunami_dataset.csv"

In [3]:
# Read CSV
volcano_data = pd.read_csv(data_path_1)
worldwide_earthquake_database = pd.read_csv(data_path_2)
tsunami_dataset = pd.read_csv(data_path_3)

In [4]:
volcano_data.head()

Unnamed: 0,Year,Month,Day,TSU,EQ,Name,Location,Country,Latitude,Longitude,...,TOTAL_DEATHS,TOTAL_DEATHS_DESCRIPTION,TOTAL_MISSING,TOTAL_MISSING_DESCRIPTION,TOTAL_INJURIES,TOTAL_INJURIES_DESCRIPTION,TOTAL_DAMAGE_MILLIONS_DOLLARS,TOTAL_DAMAGE_DESCRIPTION,TOTAL_HOUSES_DESTROYED,TOTAL_HOUSES_DESTROYED_DESCRIPTION
0,2010,1,,,,Tungurahua,Ecuador,Ecuador,-1.467,-78.442,...,,,,,,,,1.0,,
1,2010,3,31.0,,,Eyjafjallajokull,Iceland-S,Iceland,63.63,-19.62,...,2.0,1.0,,,,,,,,
2,2010,5,27.0,,,Pacaya,Guatemala,Guatemala,14.381,-90.601,...,1.0,1.0,3.0,1.0,,,,1.0,3.0,1.0
3,2010,5,29.0,TSU,EQ,Sarigan,Mariana Is-C Pacific,United States,16.708,145.78,...,,,,,,,,,,
4,2010,8,6.0,,,Karangetang [Api Siau],Sangihe Is-Indonesia,Indonesia,2.78,125.48,...,4.0,1.0,,,5.0,1.0,,,,1.0


In [5]:
#Creating DataFrame
cleaned_volcano_df = pd.DataFrame(volcano_data, columns = ["Year",
                                                           "Country",
                                                           "Elevation",
                                                           "TOTAL_DEATHS",
                                                           "TOTAL_MISSING",
                                                           "TOTAL_INJURIES"])

cleaned_volcano_df = cleaned_volcano_df.apply(lambda x: x.str.upper() if x.dtype == "object" else x)  
cleaned_volcano_df.head()

Unnamed: 0,Year,Country,Elevation,TOTAL_DEATHS,TOTAL_MISSING,TOTAL_INJURIES
0,2010,ECUADOR,5023,,,
1,2010,ICELAND,1666,2.0,,
2,2010,GUATEMALA,2552,1.0,3.0,
3,2010,UNITED STATES,538,,,
4,2010,INDONESIA,1784,4.0,,5.0


In [6]:
# Reset index and save to new CSV file
cleaned_volcano_df.reset_index(drop = True, inplace = True)
cleaned_volcano_df = cleaned_volcano_df.rename(columns = {"Year": "Year",
                                                          "Elevation":"Elevation",
                                                          "Country": "Country_Name",
                                                          "TOTAL_DEATHS": "Total_Deaths",
                                                          "TOTAL_MISSING": "Total_Missing_Persons",
                                                          "TOTAL_INJURIES": "Total_Injuries"})

cleaned_volcano_df

Unnamed: 0,Year,Country_Name,Elevation,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,ECUADOR,5023,,,
1,2010,ICELAND,1666,2.0,,
2,2010,GUATEMALA,2552,1.0,3.0,
3,2010,UNITED STATES,538,,,
4,2010,INDONESIA,1784,4.0,,5.0
...,...,...,...,...,...,...
58,2018,UNITED STATES,1222,1.0,,3.0
59,2018,PAPUA NEW GUINEA,365,,,
60,2018,INDONESIA,2799,,,30.0
61,2018,UNITED STATES,1222,,,


In [7]:
# Format the column 
cleaned_volcano_df["Country_Name"] = cleaned_volcano_df["Country_Name"].str.lower()
cleaned_volcano_df["Country_Name"] = cleaned_volcano_df["Country_Name"].str.title()
cleaned_volcano_df

Unnamed: 0,Year,Country_Name,Elevation,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,Ecuador,5023,,,
1,2010,Iceland,1666,2.0,,
2,2010,Guatemala,2552,1.0,3.0,
3,2010,United States,538,,,
4,2010,Indonesia,1784,4.0,,5.0
...,...,...,...,...,...,...
58,2018,United States,1222,1.0,,3.0
59,2018,Papua New Guinea,365,,,
60,2018,Indonesia,2799,,,30.0
61,2018,United States,1222,,,


In [8]:
cleaned_volcano_df["Country_Name"].replace({"United States" : "United States of America", "Russia" : "Russian Federation", "Cape Verde" :"Cabo Verde" }, inplace=True)
cleaned_volcano_df

Unnamed: 0,Year,Country_Name,Elevation,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,Ecuador,5023,,,
1,2010,Iceland,1666,2.0,,
2,2010,Guatemala,2552,1.0,3.0,
3,2010,United States of America,538,,,
4,2010,Indonesia,1784,4.0,,5.0
...,...,...,...,...,...,...
58,2018,United States of America,1222,1.0,,3.0
59,2018,Papua New Guinea,365,,,
60,2018,Indonesia,2799,,,30.0
61,2018,United States of America,1222,,,


In [9]:
cleaned_volcano_df.to_csv("Resources/cleaned_volcano_data.CSV", index = False)
cleaned_volcano_df

Unnamed: 0,Year,Country_Name,Elevation,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,Ecuador,5023,,,
1,2010,Iceland,1666,2.0,,
2,2010,Guatemala,2552,1.0,3.0,
3,2010,United States of America,538,,,
4,2010,Indonesia,1784,4.0,,5.0
...,...,...,...,...,...,...
58,2018,United States of America,1222,1.0,,3.0
59,2018,Papua New Guinea,365,,,
60,2018,Indonesia,2799,,,30.0
61,2018,United States of America,1222,,,


In [10]:
worldwide_earthquake_database.head()

Unnamed: 0,I_D,FLAG_TSUNAMI,YEAR,MONTH,DAY,HOUR,MINUTE,SECOND,FOCAL_DEPTH,EQ_PRIMARY,...,TOTAL_MISSING,TOTAL_MISSING_DESCRIPTION,TOTAL_INJURIES,TOTAL_INJURIES_DESCRIPTION,TOTAL_DAMAGE_MILLIONS_DOLLARS,TOTAL_DAMAGE_DESCRIPTION,TOTAL_HOUSES_DESTROYED,TOTAL_HOUSES_DESTROYED_DESCRIPTION,TOTAL_HOUSES_DAMAGED,TOTAL_HOUSES_DAMAGED_DESCRIPTION
0,1,No,-2150,,,,,,,7.3,...,,,,,,,,,,
1,2,Yes,-2000,,,,,,,,...,,,,,,,,,,
2,3,No,-2000,,,,,,18.0,7.1,...,,,,,,1.0,,1.0,,
3,5877,Yes,-1610,,,,,,,,...,,,,,,3.0,,,,
4,8,No,-1566,,,,,,,,...,,,,,,,,,,


In [11]:
#Creating DataFrame
cleaned_worldwide_earthquake_df = worldwide_earthquake_database.loc[:, ["YEAR",
                                                                        "EQ_MAG_MW",
                                                                        "COUNTRY",
                                                                        "TOTAL_DEATHS",
                                                                        "TOTAL_MISSING",
                                                                        "TOTAL_INJURIES"]]
                                                                        
cleaned_worldwide_earthquake_df.head()

Unnamed: 0,YEAR,EQ_MAG_MW,COUNTRY,TOTAL_DEATHS,TOTAL_MISSING,TOTAL_INJURIES
0,-2150,,JORDAN,,,
1,-2000,,SYRIA,,,
2,-2000,,TURKMENISTAN,1.0,,
3,-1610,,GREECE,,,
4,-1566,,ISRAEL,,,


In [12]:
# Filter the dataset for years between 2010-2018 and reset index and save to new CSV file
cleaned_worldwide_earthquake_df = cleaned_worldwide_earthquake_df.loc[(cleaned_worldwide_earthquake_df)["YEAR"] > 2009]
cleaned_worldwide_earthquake_df = cleaned_worldwide_earthquake_df.loc[(cleaned_worldwide_earthquake_df)["YEAR"] < 2019]

cleaned_worldwide_earthquake_df.reset_index(drop = True, inplace = True)
cleaned_worldwide_earthquake_df = cleaned_worldwide_earthquake_df.rename(columns = {"EQ_MAG_MW": "Magnitude",
                                                                                    "YEAR":"Year",
                                                                                    "COUNTRY": "Country_Name",
                                                                                    "TOTAL_DEATHS": "Total_Deaths",
                                                                                    "TOTAL_MISSING": "Total_Missing_Persons",
                                                                                    "TOTAL_INJURIES": "Total_Injuries"})
                                                                                    
cleaned_worldwide_earthquake_df

Unnamed: 0,Year,Magnitude,Country_Name,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,,TAJIKISTAN,,,
1,2010,6.6,SOLOMON ISLANDS,,,
2,2010,7.1,SOLOMON ISLANDS,,,
3,2010,6.8,SOLOMON ISLANDS,,,
4,2010,,INDONESIA,1.0,,2.0
...,...,...,...,...,...,...
502,2018,7.5,NEW CALEDONIA,,,
503,2018,5.6,VANUATU,,,
504,2018,5.4,CHINA,,,17.0
505,2018,5.5,MOZAMBIQUE,,,10.0


In [13]:
# Format the column 
cleaned_worldwide_earthquake_df["Country_Name"] = cleaned_worldwide_earthquake_df["Country_Name"].str.lower()
cleaned_worldwide_earthquake_df["Country_Name"] = cleaned_worldwide_earthquake_df["Country_Name"].str.title()
cleaned_worldwide_earthquake_df

Unnamed: 0,Year,Magnitude,Country_Name,Total_Deaths,Total_Missing_Persons,Total_Injuries
0,2010,,Tajikistan,,,
1,2010,6.6,Solomon Islands,,,
2,2010,7.1,Solomon Islands,,,
3,2010,6.8,Solomon Islands,,,
4,2010,,Indonesia,1.0,,2.0
...,...,...,...,...,...,...
502,2018,7.5,New Caledonia,,,
503,2018,5.6,Vanuatu,,,
504,2018,5.4,China,,,17.0
505,2018,5.5,Mozambique,,,10.0


In [14]:
# Country rename
cleaned_worldwide_earthquake_df["Country_Name"].replace({"Usa" : "United States of America", 
                                                         "South Korea" : "Republic of Korea", 
                                                         "Macedonia" :"North Macedonia", 
                                                         "Russia" : "Russian Federation",
                                                        "Venezuela": "Venezuela (Bolivarian Republic of)",
                                                        "Iran": "Iran (Islamic Republic of)",
                                                        "Taiwan": "China",
                                                        "Usa Territory": "United States of America",
                                                        "Myanmar (Burma)": "Myanmar",
                                                        "Kermadec Islands (New Zealand)": "New Zealand",
                                                        "Bosnia-Herzegovina": "Bosnia and Herzegovina",
                                                        "South Georgia And The South Sandwich Islands": "South Georgia and the South Sandwich Islands",
                                                        "Tanzania": "United Republic of Tanzania"}, inplace=True)

In [15]:
# Save as CSV
cleaned_worldwide_earthquake_df.to_csv("Resources/cleaned_worldwide_earthquake_data.CSV", index = False)

In [16]:
tsunami_dataset.head()

Unnamed: 0,ID,YEAR,MONTH,DAY,HOUR,MINUTE,LATITUDE,LONGITUDE,LOCATION_NAME,COUNTRY,...,CAUSE,EVENT_VALIDITY,EQ_MAGNITUDE,EQ_DEPTH,TS_INTENSITY,DAMAGE_TOTAL_DESCRIPTION,HOUSES_TOTAL_DESCRIPTION,DEATHS_TOTAL_DESCRIPTION,URL,COMMENTS
0,12,-330,,,,,40.0,25.0,"E. SPORADES ISLANDS, AEGEAN ISLANDS",GREECE,...,Earthquake,Very Doubtful Tsunami,7.0,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"330 B.C. Aegean Sea, Sporades Islands, 40 N 25..."
1,481,1764,2.0,11.0,,,51.45,-2.583,"BRISTOL, ENGLAND",UK,...,Unknown,Very Doubtful Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"Reference #1894, in full: ""On Saturday the 11t..."
2,71,859,,,,,36.08,36.25,SAMANDAGI,TURKEY,...,Earthquake,Questionable Tsunami,,,3.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"859 (possibly 861), November. Levantian Sea, N..."
3,186,1580,1.0,,,,40.0,128.0,YELLOW SEA,NORTH KOREA,...,Unknown,Very Doubtful Tsunami,,,1.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,<P><blockquote><i>Reference #414:</i></blockqu...
4,5,-1300,,,,,39.96,26.24,"IONIAN COASTS, TROAD",TURKEY,...,Unknown,Questionable Tsunami,6.0,,5.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,1300 B.C. Ionian and Aegean Seas. References t...


In [17]:
#Creating DataFrame
cleaned_tsunami_df = tsunami_dataset.loc[:, ["YEAR",
                                             "COUNTRY",
                                             "CAUSE",
                                            "DEATHS_TOTAL_DESCRIPTION"]]
cleaned_tsunami_df.head()

Unnamed: 0,YEAR,COUNTRY,CAUSE,DEATHS_TOTAL_DESCRIPTION
0,-330,GREECE,Earthquake,
1,1764,UK,Unknown,
2,859,TURKEY,Earthquake,
3,1580,NORTH KOREA,Unknown,
4,-1300,TURKEY,Unknown,


In [18]:
# Filter the dataset for years between 2010-2018 and reset index and save to new CSV file
cleaned_tsunami_df = cleaned_tsunami_df.loc[(cleaned_tsunami_df)["YEAR"] > 2009]
cleaned_tsunami_df = cleaned_tsunami_df.loc[(cleaned_tsunami_df)["YEAR"] < 2019]

cleaned_tsunami_df.reset_index(drop = True, inplace = True)
cleaned_tsunami_df = cleaned_tsunami_df.rename(columns = {"CAUSE": "Cause", "YEAR": "Year", "COUNTRY":"Country_Name", "DEATHS_TOTAL_DESCRIPTION" :"Total_Deaths"})

cleaned_tsunami_df

Unnamed: 0,Year,Country_Name,Cause,Total_Deaths
0,2010,SOLOMON ISLANDS,Earthquake,
1,2010,SOLOMON ISLANDS,Earthquake,
2,2010,SOLOMON ISLANDS,Earthquake,
3,2010,HAITI,Earthquake and Landslide,Very Many (~1001 or more people)
4,2010,CHILE,Earthquake,Many (~101 to 1000 people)
...,...,...,...,...
133,2018,NEW CALEDONIA,Earthquake,
134,2018,RUSSIA,Landslide,
135,2014,ICELAND,Landslide,
136,2016,JAPAN,Earthquake,


In [19]:
# Format the column 
cleaned_tsunami_df["Country_Name"] = cleaned_tsunami_df["Country_Name"].str.lower()
cleaned_tsunami_df["Country_Name"] = cleaned_tsunami_df["Country_Name"].str.title()
cleaned_tsunami_df

Unnamed: 0,Year,Country_Name,Cause,Total_Deaths
0,2010,Solomon Islands,Earthquake,
1,2010,Solomon Islands,Earthquake,
2,2010,Solomon Islands,Earthquake,
3,2010,Haiti,Earthquake and Landslide,Very Many (~1001 or more people)
4,2010,Chile,Earthquake,Many (~101 to 1000 people)
...,...,...,...,...
133,2018,New Caledonia,Earthquake,
134,2018,Russia,Landslide,
135,2014,Iceland,Landslide,
136,2016,Japan,Earthquake,


In [24]:
# Country rename
cleaned_tsunami_df["Country_Name"].replace({"Usa" : "United States of America",  
                                                         "Russia" : "Russian Federation",
                                                        "Venezuela": "Venezuela (Bolivarian Republic of)",
                                                        "Iran": "Iran (Islamic Republic of)",
                                                        "Usa Territory": "United States of America",
                                           "Uk Territory" : "United Kingdom of Great Britain and Northern Ireland",
                                           "Kermadec Islands" : "New Zealand",
                                           "Uk" : "United Kingdom of Great Britain and Northern Ireland"}, inplace=True)

In [25]:
# Save as CSV
cleaned_tsunami_df.to_csv("Resources/cleaned_tsunami_data.CSV", index = False)

In [21]:
merged_df=  cleaned_volcano_df.merge(cleaned_worldwide_earthquake_df, on = ["Country_Name","Year"], how = "inner")\
                              .merge(cleaned_tsunami_df,on = ["Country_Name","Year"], how = "inner")

merged_df = merged_df.set_index(["Country_Name","Year"])
sorted_df = merged_df.sort_index()
sorted_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Elevation,Total_Deaths_x,Total_Missing_Persons_x,Total_Injuries_x,Magnitude,Total_Deaths_y,Total_Missing_Persons_y,Total_Injuries_y,Cause,Total_Deaths
Country_Name,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Chile,2011,2236,,,,6.8,,,,Earthquake,
Chile,2015,2003,,,,8.3,15.0,1.0,14.0,Earthquake,Few (~1 to 50 people)
Chile,2015,2003,,,,8.3,15.0,1.0,14.0,Earthquake,
Chile,2015,2003,,,,6.9,,,,Earthquake,Few (~1 to 50 people)
Chile,2015,2003,,,,6.9,,,,Earthquake,
...,...,...,...,...,...,...,...,...,...,...,...
Papua New Guinea,2018,365,,,,7.5,145.0,,300.0,Volcano,
Papua New Guinea,2018,365,,,,6.1,1.0,,,Volcano,
Papua New Guinea,2018,365,,,,6.0,11.0,,,Volcano,
Papua New Guinea,2018,365,,,,6.7,25.0,,,Volcano,
