# Importing CSV Dataset

In [298]:
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from scipy.optimize import curve_fit
import math
import numpy as np
import chart_studio.plotly as py
from plotly.graph_objs import *

In [305]:
df = pd.read_csv('volcanic_activity.csv',sep =';', decimal = ',')
df.describe()

Unnamed: 0,Year,Month,Day,Latitude,Longitude,Elevation,Volcano Explosivity Index (VEI),DEATHS,DEATHS_DESCRIPTION,MISSING,...,TOTAL_DEATHS,TOTAL_DEATHS_DESCRIPTION,TOTAL_MISSING,TOTAL_MISSING_DESCRIPTION,TOTAL_INJURIES,TOTAL_INJURIES_DESCRIPTION,TOTAL_DAMAGE_MILLIONS_DOLLARS,TOTAL_DAMAGE_DESCRIPTION,TOTAL_HOUSES_DESTROYED,TOTAL_HOUSES_DESTROYED_DESCRIPTION
count,658.0,552.0,499.0,658.0,658.0,658.0,570.0,310.0,425.0,5.0,...,322.0,438.0,4.0,8.0,64.0,87.0,10.0,184.0,27.0,98.0
mean,1671.405775,6.331522,15.244489,16.306745,49.928179,1936.869301,2.887719,609.219355,1.656471,391.4,...,1136.875776,1.714612,489.0,2.125,240.734375,1.666667,266.53,1.76087,1170.037037,2.234694
std,786.005793,3.32191,9.065156,26.102522,100.351199,1244.770279,1.338108,2889.037021,1.013872,763.961583,...,7417.486636,1.04293,845.386302,1.125992,1252.329312,0.897887,636.873961,0.956651,2265.969846,0.96111
min,-4360.0,1.0,1.0,-62.97,-178.47,-642.0,0.0,1.0,1.0,1.0,...,1.0,1.0,3.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0
25%,1730.0,3.75,7.5,-6.102,-19.7,1053.0,2.0,2.0,1.0,3.0,...,2.0,1.0,59.25,1.0,3.75,1.0,3.475,1.0,32.0,1.0
50%,1895.0,7.0,15.0,13.736,110.442,1695.0,3.0,7.0,1.0,78.0,...,9.0,1.0,99.0,2.0,13.0,1.0,9.75,1.0,90.0,2.0
75%,1967.0,9.0,23.0,36.404,129.96775,2565.0,4.0,80.0,2.0,120.0,...,117.75,3.0,528.75,3.0,66.25,2.0,18.0,3.0,835.5,3.0
max,2014.0,12.0,31.0,65.73,177.18,5967.0,7.0,30000.0,4.0,1755.0,...,117000.0,4.0,1755.0,4.0,10000.0,4.0,2000.0,4.0,9000.0,4.0


In [300]:
df = df[df['Year'] >= 1900]
df = df[(df['Agent'].str.contains('T',na=False))]
df = df.drop(columns=['Associated Tsunami?', 'Associated Earthquake?',
       'Location', 'Country', 'Elevation',
       'Type', 'Status', 'Time','Agent',
       'DEATHS', 'DEATHS_DESCRIPTION', 'MISSING', 'MISSING_DESCRIPTION',
       'INJURIES', 'INJURIES_DESCRIPTION', 'DAMAGE_MILLIONS_DOLLARS',
       'DAMAGE_DESCRIPTION', 'HOUSES_DESTROYED',
       'HOUSES_DESTROYED_DESCRIPTION', 'TOTAL_DEATHS',
       'TOTAL_DEATHS_DESCRIPTION', 'TOTAL_MISSING',
       'TOTAL_MISSING_DESCRIPTION', 'TOTAL_INJURIES',
       'TOTAL_INJURIES_DESCRIPTION', 'TOTAL_DAMAGE_MILLIONS_DOLLARS',
       'TOTAL_DAMAGE_DESCRIPTION', 'TOTAL_HOUSES_DESTROYED',
       'TOTAL_HOUSES_DESTROYED_DESCRIPTION'])
df = df.dropna()
df = df.reset_index()
df['Volume of erupted Tephra (km^3)'] = [0.0001*10**i for i in df['Volcano Explosivity Index (VEI)'].values]
df.head()

Unnamed: 0,index,Year,Month,Day,Name,Latitude,Longitude,Volcano Explosivity Index (VEI),Volume of erupted Tephra (km^3)
0,338,1900,7.0,17.0,Adatara,37.62,140.28,2.0,0.01
1,348,1902,10.0,24.0,Santa Maria,14.756,-91.552,6.0,100.0
2,350,1903,8.0,30.0,Okataina,-38.12,176.5,1.0,0.001
3,352,1905,3.0,10.0,Vesuvius,40.821,14.426,2.0,0.01
4,354,1906,4.0,4.0,Vesuvius,40.821,14.426,3.0,0.1


# Visualisation

In [301]:
fig = px.scatter_geo(df,
                    lat='Latitude',
                    lon='Longitude',
                    size = 'Volcano Explosivity Index (VEI)',
                    color = 'Volcano Explosivity Index (VEI)',
                    hover_name = 'Name',
                    labels={"Volcano Explosivity Index (VEI)": "VEI"},
                    projection ='natural earth',
                    title = "Volcanic activity from 1900 to 2010")
fig.show()

In [302]:
vol_year = df.groupby(['Year'])['Volcano Explosivity Index (VEI)'].sum()


fig = px.bar(vol_year)
fig.update_layout(showlegend = False,
                 xaxis=dict(title = "Year",color = 'black'),
                 yaxis=dict(showgrid = True,
                            title = "Sum of VEI over each year",
                            color = 'black'),
                 title = "Volcanic activity over years from 1900 to 2010")
fig.show()

In [303]:
Tephra_year = df.groupby(['Year'])['Volume of erupted Tephra (km^3)'].sum()


fig = px.bar(Tephra_year)
fig.update_layout(showlegend = False,
                 xaxis=dict(title = "Year",color = 'black'),
                 yaxis=dict(showgrid = True,
                            title = "Volume of erupted Tephra (km^3)",
                            color = 'black'),
                 title = "Volcanic activity over years from 1900 to 2010")
fig.show()