In [68]:
import pandas as pd
import plotly.express as px

# Load the data
data = "GSAF5.xlsx"
df = pd.read_excel(data)

In [69]:
# Clean the data
df['Year'] = df['Year'].fillna(0).astype(int)
df = df[df['Year'] > 0]
df1900 = df[df['Year'] > 1900]

In [70]:
# Visualization 1: Shark Attacks by Year
fig1 = px.histogram(df1900, x='Year', nbins=100, title='Shark Attacks by Year')
fig1.show()

In [71]:
# Visualization 2: Shark Attacks by Country
fig2 = px.pie(df, names='Country', title='Shark Attacks by Country')
fig2.update_traces(textposition='inside')
fig2.update_layout()
fig2.show()

In [72]:
# Aggregate the number of shark attacks by country
country_attacks = df['Country'].value_counts().reset_index()
country_attacks.columns = ['Country', 'Num_Attacks']

# Visualization 8: Choropleth Map of Shark Attacks by Country
fig3 = px.choropleth(country_attacks, locations='Country', color='Num_Attacks',
                     locationmode='country names', title='Shark Attacks by Country',
                     color_continuous_scale=px.colors.sequential.Plasma)
fig3.show()

In [73]:
# Visualization 3: Shark Attacks by Activity
fig4 = px.pie(df, names='Activity', title='Shark Attacks by Activity')
fig4.update_layout(showlegend=False)
fig4.update_traces(textposition='inside', textinfo='percent+label')
fig4.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
fig4.show()

In [77]:
# Get the top 10 most common activities
top_activities = df['Activity'].value_counts().nlargest(10)

# Visualization 10: Shark Attacks by Activity
fig10 = px.pie(top_activities, names=top_activities.index, values=top_activities.values,
               title='Top 10 Most Common Activities During Shark Attacks')
fig10.show()

In [74]:
# Visualization 5: Shark Attacks by Species
species = df.iloc[:, 13]
species_counts = species.value_counts().nlargest(10)
fig5 = px.bar(species_counts, x=species_counts.index, y=species_counts.values, title='Top 10 Shark Species Involved in Attacks')
fig5.show()

In [75]:
# Visualization 6: Shark Attacks by Fatal (Y/N)
fatal_counts = df['Fatal (Y/N)'].value_counts()
fig6 = px.bar(fatal_counts, x=fatal_counts.index, y=fatal_counts.values, title='Shark Attacks: Fatal vs Non-Fatal')
fig6.show()

In [76]:
# Visualization 7: Shark Attacks by Type
type_counts = df['Type'].value_counts()
fig7 = px.bar(type_counts, x=type_counts.index, y=type_counts.values, title='Shark Attacks by Type')
fig7.show()