## Setup

In [2]:
# Import the required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import plotly.express as px

# Set pandas optino to view complete dataframe
pd.set_option('display.max_columns', 23)

In [3]:
#dataset_location = './Earthquakes'
#file_name = 'quakes-cleaned.csv'

df = pd.read_csv('quakes-cleaned.csv')

### Insight 1: Most Earthquakes by `locationSource`

```python
count_by_location_source = df.groupby('locationSource')['mag'].count().reset_index()

fig = px.bar(count_by_location_source, x='locationSource', y='mag', title='Number of Earthquakes by locationSource', color='locationSource')
fig.show()
```

**Description:**
This bar chart visualizes the count of earthquakes for each `locationSource`. Each bar represents a source, and the height of the bar corresponds to the number of quakes it has occured in location. From the plot it is visible that the `ak` location had the highest number of quake compared to the rest of the locations.

In [4]:
count_by_location_source = df.groupby('locationSource')['mag'].count().reset_index()

fig = px.bar(count_by_location_source, x='locationSource', y='mag', title='Number of Earthquakes by locationSource',color='locationSource')
fig.show()

### Insight 2: Top N `locationSource` with Highest Average Magnitude and Count of Earthquakes

```python
top_n = 10
region_stats = df.groupby('locationSource')['mag'].agg(['mean', 'count']).sort_values(by='mean', ascending=False)
top_locations = region_stats.head(top_n).reset_index()

fig = px.scatter(top_locations, x='locationSource', y='mean', size='count', color='mean',
                 title=f'Top {top_n} locationSource with Highest Average Magnitude and Count of Earthquakes',
                 labels={'locationSource': 'Location Source', 'mean': 'Mean Magnitude', 'count': 'Number of Earthquakes'})
fig.show()
```

**Description:**
This scatter plot visualizes the top N `locationSource` with the highest average earthquake magnitude. The size of the markers represents the count of earthquakes, and the color represents the mean magnitude. THe location `us` had the highest mean as well as the number of quakes compared to other locations. This plot gives us information that this location will have frequently high magnitude quakes compared to other locations. For example in the previous plot the highest mean quake was in location `pt` but the occurance is only 1 compared to location `us` which had more than 1000 occurunce.

In [4]:
top_n = 10
region_stats = df.groupby('locationSource')['mag'].agg(['mean', 'count']).sort_values(by='mean', ascending=False)
top_locations = region_stats.head(top_n).reset_index()

fig = px.scatter(top_locations, x='locationSource', y='mean', size='count', color='mean',
                 title=f'Top {top_n} locationSource with Highest Average Magnitude and Count of Earthquakes',
                 labels={'locationSource': 'Location Source', 'mean': 'Mean Magnitude', 'count': 'Number of Earthquakes'})
fig.show()

### Insight 3: Latitude and Longitude with the Highest Frequency of Earthquakes

```python
top_coordinates = df.groupby(['latitude', 'longitude']).size().idxmax()
latitude, longitude = top_coordinates

fig = px.scatter_geo(df, lat='latitude', lon='longitude', title='Latitude and Longitude with the Highest Frequency of Earthquakes',
                     labels={'latitude': 'Latitude', 'longitude': 'Longitude'},
                     hover_data=['depth', 'mag'], color='mag')
fig.show()
```

**Description:**
This scatter plot on a geographic map visualizes the latitude and longitude coordinates with the highest frequency of earthquakes. The color of each point represents the magnitude, and additional information is available on hover.

In [5]:
top_coordinates = df.groupby(['latitude', 'longitude']).size().idxmax()
latitude, longitude = top_coordinates

fig = px.scatter_geo(df, lat='latitude', lon='longitude', title='Latitude and Longitude with the Highest Frequency of Earthquakes',
                     labels={'latitude': 'Latitude', 'longitude': 'Longitude'},
                     hover_data=['depth', 'mag'], color='mag')
fig.show()

### Insight 4: Distribution of Earthquake Depths

```python
# Insight 4: Distribution of Earthquake Depths using Plotly
num_bins = 30
depth_distribution = df['depth']

fig = px.histogram(depth_distribution, nbins=num_bins, title='Distribution of Earthquake Depths',
                   labels={'value': 'Depth'},
                   marginal='rug', opacity=0.7)
fig.show()
```

**Description:**
This insight provides a  distribution of earthquake depths. It includes count for each set of depths. This helps understand the depth characteristics of earthquakes in the dataset.

In [8]:
# Insight 4: Distribution of Earthquake Depths using Plotly
num_bins = 30
depth_distribution = df['depth']

fig = px.histogram(depth_distribution, nbins=num_bins, title='Distribution of Earthquake Depths',
                   labels={'value': 'Depth'},
                   marginal='rug', opacity=0.7)
fig.show()

### Insight 5: Distribution of Earthquake Types

```python
# Insight 5: Distribution of Earthquake Types
earthquake_types_distribution = df['type'].value_counts().reset_index()

fig = px.pie(earthquake_types_distribution, names='type', values='count',
             title='Distribution of Earthquake Types', hole=0.5)
fig.show()
```

**Description:**
This Pie chart visualizes the distribution of types of quakes.

In [13]:
# Insight 5: Distribution of Earthquake Types
earthquake_types_distribution = df['type'].value_counts().reset_index()

fig = px.pie(earthquake_types_distribution, names='type', values='count',
             title='Distribution of Earthquake Types', hole=0.5)
fig.show()