# Code with me: Data Exploration with Pandas using Volcano.csv file

In [None]:
# Target Age Group: Teens with Python coding background

Note: This dataset has no date of last eurption.  it is a list of known volcano with location data on on global volcanic hazard, historical events, population exposure, vulnerability, and impact has been provided to GAR15 by Global Volcano Model (GVM) and The International Association of Volcanology and Chemistry of the Earth’s Interior (IAVCEI).

Download data from <a href="https://data.humdata.org/dataset/a60ac839-920d-435a-bf7d-25855602699d">here</a>

# Panda Cheatsheet

In [None]:
# Pandas Basic
# http://datacamp-community-prod.s3.amazonaws.com/dbed353d-2757-4617-8206-8767ab379ab3

# Pandas Data Wrangling
# https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf

## How to use Pandas to read CSV file?

In [4]:
import pandas as pd
df = pd.read_csv("volcano.csv")
df

Unnamed: 0,VolcanoID,V_Name,Country,Region,Subregion,Latitude,Longitude,PEI,H_active,VEI_Holoce,hazard,class,risk
0,210010,West Eifel Volcanic Field,Germany,Mediterranean and W Asia,Western Europe,50.170,6.85,6,0,Unknown VEI,,U-HR,
1,210020,Cha?ne des Puys,France,Mediterranean and W Asia,Western Europe,45.775,2.97,7,0,Unknown VEI,,U-HR,
2,210030,Olot Volcanic Field,Spain,Mediterranean and W Asia,Western Europe,42.170,2.53,5,0,No confirmed eruptions,,U-NHHR,
3,210040,Calatrava Volcanic Field,Spain,Mediterranean and W Asia,Western Europe,38.870,-4.02,6,0,Unknown VEI,,U-HR,
4,211001,Larderello,Italy,Mediterranean and W Asia,Italy,43.250,10.87,4,0,3,,U-HR,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1541,390100,Candlemas Island,United Kingdom,Antarctica,Antarctica and South Sandwich Islands,-57.080,-26.67,1,1,2,,U-HHR,
1542,390110,Hodson,United Kingdom,Antarctica,Antarctica and South Sandwich Islands,-56.700,-27.15,1,0,No confirmed eruptions,,U-NHHR,
1543,390120,Leskov Island,United Kingdom,Antarctica,Antarctica and South Sandwich Islands,-56.670,-28.13,1,0,No confirmed eruptions,,U-NHHR,
1544,390130,Zavodovski,United Kingdom,Antarctica,Antarctica and South Sandwich Islands,-56.300,-27.57,1,1,2,,U-HHR,


## What is a Pandas DataFrame?

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1546 entries, 0 to 1545
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   VolcanoID   1546 non-null   int64  
 1   V_Name      1546 non-null   object 
 2   Country     1546 non-null   object 
 3   Region      1546 non-null   object 
 4   Subregion   1546 non-null   object 
 5   Latitude    1546 non-null   float64
 6   Longitude   1546 non-null   float64
 7   PEI         1546 non-null   int64  
 8   H_active    1546 non-null   int64  
 9   VEI_Holoce  1546 non-null   object 
 10  hazard      328 non-null    float64
 11  class       1218 non-null   object 
 12  risk        328 non-null    float64
dtypes: float64(4), int64(3), object(6)
memory usage: 157.1+ KB


## How to get the list of unique values from a Column?

In [10]:
# how many volcano in each region
df["Region"].value_counts()

South America                   197
Indonesia                       145
Japan, Taiwan, Marianas         143
Africa and Red Sea              141
Kamchatka and Mainland Asia     140
M?xico and Central America      118
Alaska                           92
Melanesia and Australia          83
Canada and Western USA           70
Philippines and SE Asia          59
Middle East and Indian Ocean     56
New Zealand to Fiji              56
Kuril Islands                    48
Mediterranean and W Asia         46
Atlantic Ocean                   37
Hawaii and Pacific Ocean         34
Iceland and Arctic Ocean         33
Antarctica                       32
West Indies                      16
Name: Region, dtype: int64

## How to select data subset from DataFrame?

In [17]:
df[df.Country == 'Indonesia']

Unnamed: 0,VolcanoID,V_Name,Country,Region,Subregion,Latitude,Longitude,PEI,H_active,VEI_Holoce,hazard,class,risk
384,261020,Seulawah Agam,Indonesia,Indonesia,Sumatra,5.448,95.658,4,1,2,,U-HHR,
385,261030,Peuet Sague,Indonesia,Indonesia,Sumatra,4.914,96.329,3,1,2,2.0,,2.0
386,261050,"Telong, Bur ni",Indonesia,Indonesia,Sumatra,4.769,96.821,5,1,2,1.0,,2.0
387,261070,Sibayak,Indonesia,Indonesia,Sumatra,3.230,98.520,5,1,Unknown VEI,,U-HHR,
388,261080,Sinabung,Indonesia,Indonesia,Sumatra,3.170,98.392,5,1,2,,U-HHR,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
521,268063,Moti,Indonesia,Indonesia,Halmahera,0.450,127.400,3,0,No confirmed eruptions,,U-NHHR,
522,268070,Makian,Indonesia,Indonesia,Halmahera,0.320,127.400,3,1,4,3.0,,2.0
523,268071,Tigalalu,Indonesia,Indonesia,Halmahera,0.070,127.420,3,0,No confirmed eruptions,,U-NHHR,
524,268072,Amasing,Indonesia,Indonesia,Halmahera,-0.530,127.480,2,0,No confirmed eruptions,,U-NHHR,


## How to save to CSV file using Pandas?

In [18]:
indon = df[df.Country == 'Indonesia']
indon.to_csv("indonesia_v.csv")

# Map with Geographical Information

## How to embed a map in Notebook?

In [None]:
import folium

# The default tiles are set to OpenStreetMap, but Stamen Terrain, Stamen Toner, Mapbox Bright, and Mapbox Control Room, and many others tiles are built in.

m = folium.Map(location=[5.5236, 95.6750], tiles='Stamen Terrain', zoom_start=5)
m

## How to add a map marker?

In [None]:
tooltip = 'Click me!'
		
# test
folium.Marker([3.170, 98.392], popup='<i>Mt Sinabung in Sumatra</i>', tooltip=tooltip).add_to(m)
folium.Marker([0.070, 127.420], popup='<b>Mt Tigalalu in Halmahera</b>', tooltip=tooltip).add_to(m)
m

# DataFrame: Subset

## How to get the count of records from a DataFrame?

In [44]:
indon.VolcanoID.count()

142

## How to get each row of record by the index?

In [43]:
indon.iloc[0]

VolcanoID            261020
V_Name        Seulawah Agam
Country           Indonesia
Region            Indonesia
Subregion           Sumatra
Latitude              5.448
Longitude            95.658
PEI                       4
H_active                  1
VEI_Holoce                2
hazard                  NaN
class                 U-HHR
risk                    NaN
Name: 384, dtype: object

## How to get the attribute from a Python object?

In [48]:
indon.iloc[0].Latitude

5.448

# Use a FOR loop

## Test with 5 Markers

In [None]:
# lets use a for loop
tooltip = 'Click me!'

total = indon.VolcanoID.count()

for index in range(total):
    record = indon.iloc[index]

    lat       = record.Latitude
    lon       = record.Longitude
    vname     = record.V_Name
    subregion = record.Subregion

    formatted = "<i>Mt {} in {}</i>".format(vname, subregion)

    folium.Marker([lat, lon], popup=formatted, tooltip=tooltip).add_to(m)
    if index == 5: break # test 5 markers

m

## Complete Codes

In [None]:
# complete codes
import folium

# The default tiles are set to OpenStreetMap, but Stamen Terrain, Stamen Toner, Mapbox Bright, and Mapbox Control Room, and many others tiles are built in.

m = folium.Map(location=[5.5236, 95.6750], tiles='Stamen Terrain', zoom_start=5)
tooltip = 'Click me!'

total = indon.VolcanoID.count()

for index in range(total):
    record = indon.iloc[index]

    lat       = record.Latitude
    lon       = record.Longitude
    vname     = record.V_Name
    subregion = record.Subregion

    formatted = "<i>Mt {} in {}</i>".format(vname, subregion)

    folium.Marker([lat, lon], popup=formatted, tooltip=tooltip).add_to(m)
    # if index == 5: break # test 5 markers

m

# Coding Challenge

In [None]:
# we have completed the map for Indonesia.

# challenge
# can you do the map for all the volcanoes in the world?