In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv('/content/drive/MyDrive/TSWD Final Project/tsunami_dataset.csv')
df.head()

Unnamed: 0,ID,YEAR,MONTH,DAY,HOUR,MINUTE,LATITUDE,LONGITUDE,LOCATION_NAME,COUNTRY,REGION,CAUSE,EVENT_VALIDITY,EQ_MAGNITUDE,EQ_DEPTH,TS_INTENSITY,DAMAGE_TOTAL_DESCRIPTION,HOUSES_TOTAL_DESCRIPTION,DEATHS_TOTAL_DESCRIPTION,URL,COMMENTS
0,12,-330,,,,,40.0,25.0,"E. SPORADES ISLANDS, AEGEAN ISLANDS",GREECE,Mediterranean Sea,Earthquake,Very Doubtful Tsunami,7.0,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"330 B.C. Aegean Sea, Sporades Islands, 40 N 25..."
1,481,1764,2.0,11.0,,,51.45,-2.583,"BRISTOL, ENGLAND",UK,Northeast Atlantic Ocean,Unknown,Very Doubtful Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"Reference #1894, in full: ""On Saturday the 11t..."
2,71,859,,,,,36.08,36.25,SAMANDAGI,TURKEY,Mediterranean Sea,Earthquake,Questionable Tsunami,,,3.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"859 (possibly 861), November. Levantian Sea, N..."
3,186,1580,1.0,,,,40.0,128.0,YELLOW SEA,NORTH KOREA,"China, North and South Korea, Philippines, Taiwan",Unknown,Very Doubtful Tsunami,,,1.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,<P><blockquote><i>Reference #414:</i></blockqu...
4,5,-1300,,,,,39.96,26.24,"IONIAN COASTS, TROAD",TURKEY,Mediterranean Sea,Unknown,Questionable Tsunami,6.0,,5.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,1300 B.C. Ionian and Aegean Seas. References t...


In [4]:
df['DAY'].isnull().sum()

177

In [5]:
# REMOVE NAN VALUES IN 'DAY' COLUMN

df_dayNA = df[df['DAY'].notna()]
df_dayNA.head()

Unnamed: 0,ID,YEAR,MONTH,DAY,HOUR,MINUTE,LATITUDE,LONGITUDE,LOCATION_NAME,COUNTRY,REGION,CAUSE,EVENT_VALIDITY,EQ_MAGNITUDE,EQ_DEPTH,TS_INTENSITY,DAMAGE_TOTAL_DESCRIPTION,HOUSES_TOTAL_DESCRIPTION,DEATHS_TOTAL_DESCRIPTION,URL,COMMENTS
1,481,1764,2.0,11.0,,,51.45,-2.583,"BRISTOL, ENGLAND",UK,Northeast Atlantic Ocean,Unknown,Very Doubtful Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"Reference #1894, in full: ""On Saturday the 11t..."
6,174,1556,10.0,21.0,,,37.0,126.0,YELLOW SEA,SOUTH KOREA,"China, North and South Korea, Philippines, Taiwan",Unknown,Very Doubtful Tsunami,,,0.5,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,<P><blockquote><i>Reference #414:</i></blockqu...
7,255,1641,12.0,21.0,,,38.4,-28.1,AZORES,PORTUGAL,Northeast Atlantic Ocean,Earthquake,Very Doubtful Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,
9,219,1606,1.0,23.0,,,31.9,140.0,CHICHIJIMA-TOKAIDO,JAPAN,Japan,Volcano,Definite Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,<P><blockquote><i>Reference #414:</i></blockqu...
10,480,1763,9.0,18.0,,,50.6,-2.466,"WEYMOUTH, ENGLAND",UK,Northeast Atlantic Ocean,Unknown,Very Doubtful Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"On Sept 18, 1763, in Weymouth, England, the se..."


In [6]:
# MAKE SURE THE EVENT WAS DEFINITELY A TSUNAMI (NO DISPUTE)

df_definite = df[df['EVENT_VALIDITY'] == 'Definite Tsunami']
df_definite.head()

Unnamed: 0,ID,YEAR,MONTH,DAY,HOUR,MINUTE,LATITUDE,LONGITUDE,LOCATION_NAME,COUNTRY,REGION,CAUSE,EVENT_VALIDITY,EQ_MAGNITUDE,EQ_DEPTH,TS_INTENSITY,DAMAGE_TOTAL_DESCRIPTION,HOUSES_TOTAL_DESCRIPTION,DEATHS_TOTAL_DESCRIPTION,URL,COMMENTS
9,219,1606,1.0,23.0,,,31.9,140.0,CHICHIJIMA-TOKAIDO,JAPAN,Japan,Volcano,Definite Tsunami,,,,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,<P><blockquote><i>Reference #414:</i></blockqu...
24,269,1650,9.0,29.0,,,36.404,25.396,THERA ISLAND (SANTORINI),GREECE,Mediterranean Sea,Volcano and Landslide,Definite Tsunami,6.3,,6.0,,,Many (~101 to 1000 people),https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"""At the end of 1650 a submarine outbreak occur..."
33,21,46,,,,,36.404,25.396,THERA ISLAND (SANTORINI),GREECE,Mediterranean Sea,Volcano,Definite Tsunami,6.2,,3.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,46 AD July 6. Thera. At the same time as an ec...
39,46,551,4.0,,,,38.4,22.3,ETOLIA,GREECE,Mediterranean Sea,Earthquake and Landslide,Definite Tsunami,7.1,,4.0,,,,https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"<P>551, April. Corinthos-Patras Gulf. 38<sup>o..."
56,72,863,7.0,10.0,,,37.1,138.0,NW. HONSHU ISLAND,JAPAN,Japan,Earthquake and Landslide,Definite Tsunami,7.0,,2.0,Severe (~>$5 to $24 million),Many (~101 to 1000 houses),Many (~101 to 1000 people),https://www.ngdc.noaa.gov/hazel/view/hazards/t...,"July 10, 863, 37.1 N 138 E, magnitude 7. Lands..."


In [7]:
# WHICH COUNTRY HAS HAD THE MOST TSUNAMIS?

countries = sorted(list(set(df.COUNTRY.unique())))

count = {}
for i in df.COUNTRY:
  if i in count:
    count[i] += 1
  else:
    count[i] = 1

sortedDict = sorted(count.items(), key = lambda x: x[1])
for i in sortedDict:
  print(i[0], ":", i[1])

EAST CHINA SEA : 1
DEAD SEA : 1
GERMANY : 1
HOLLAND : 1
SRI LANKA : 1
PACIFIC OCEAN : 1
NEPAL : 1
SAINT VINCENT AND THE GRENADINES : 1
TUNISIA : 1
TURKMENISTAN : 1
KENYA : 1
ERITREA : 1
SUDAN : 1
NAURU : 1
WALLIS AND FUTUNA (FRENCH TERRITORY) : 1
NETHERLANDS : 1
LEBANON : 2
ANTIGUA AND BARBUDA : 2
JORDAN : 2
NORTHWEST PACIFIC OCEAN : 2
SWEDEN : 2
TOGO : 2
MOROCCO : 2
MONTENEGRO : 2
CONGO : 2
KERMADEC ISLANDS : 2
GEORGIA : 2
COOK ISLANDS : 2
GRENADA : 2
NORTH KOREA : 3
CYPRUS : 3
MYANMAR (BURMA) : 3
MICRONESIA, FED. STATES OF : 3
SOUTH AFRICA : 3
FRENCH POLYNESIA : 3
ATLANTIC OCEAN : 3
URUGUAY : 3
ANTARCTICA : 3
GHANA : 3
UK TERRITORY : 3
SWITZERLAND : 3
ICELAND : 4
TRINIDAD AND TOBAGO : 4
EGYPT : 4
PAKISTAN : 4
BULGARIA : 4
ISRAEL : 5
DOMINICAN REPUBLIC : 5
BANGLADESH : 5
ECUADOR : 5
GREENLAND : 5
MONTSERRAT : 5
IRAN : 5
VIETNAM : 5
SYRIA : 6
HONDURAS : 6
FRANCE : 6
GUADELOUPE (FRENCH TERRITORY) : 6
CUBA : 6
GUATEMALA : 6
NICARAGUA : 6
FIJI : 7
UKRAINE : 7
CROATIA : 8
EL SALVADOR : 8
H