In [17]:
import pandas as pd
from warnings import filterwarnings

filterwarnings(action='ignore', category=FutureWarning)

# Define the local file path
DEATHS = r'C:\Users\Ksnes\OneDrive\Documents\GitHub\Machine-Learning\8000m peak Deaths\deaths_on_eight-thousanders.csv'

# Read the CSV file using the local file path
df = pd.read_csv(filepath_or_buffer=DEATHS, parse_dates=['Date'])
df['year'] = df['Date'].dt.year
df.head()

Unnamed: 0,Date,Name,Nationality,Cause of death,Mountain,year
0,2023-07-27,Muhammad Hassan,Pakistan,Unknown,K2,2023
1,2022-07-22,Matthew Eakin,Australia,Fall,K2,2022
2,2022-07-22,Richard Cartier,Canada,Fall,K2,2022
3,2022-07-21,Ali Akbar Sakhi,Afghanistan,"Unknown, suspected altitude sickness",K2,2022
4,2021-07-25,Rick Allen,United Kingdom,Avalanche,K2,2021


In [19]:
from arrow import now
from geocoder import arcgis

time_start = now()

# Create a DataFrame for value counts
value_counts_df = df['Mountain'].value_counts().reset_index()
value_counts_df.columns = ['Mountain', 'count']

# Create a list to store latitude and longitude values
latitudes = []
longitudes = []

# Loop through unique mountain names and retrieve latitudes and longitudes
for mountain in df['Mountain'].unique():
    location = arcgis(location=mountain)
    if location is not None:
        latitudes.append(location.latlng[0])
        longitudes.append(location.latlng[1])
    else:
        # If location is not found, append None to both latitudes and longitudes
        latitudes.append(None)
        longitudes.append(None)

# Create a DataFrame for mountain locations
mountain_df = pd.DataFrame({
    'Mountain': df['Mountain'].unique(),
    'latitude': latitudes,
    'longitude': longitudes
})

# Merge the two DataFrames on the 'Mountain' column
mountain_df = mountain_df.merge(value_counts_df, on='Mountain')

print('done in {}'.format(now() - time_start))

done in 0:00:37.398480


In [20]:
from plotly.express import scatter_mapbox
scatter_mapbox(data_frame=mountain_df, lat='latitude', lon='longitude', size='count', hover_name='Mountain', mapbox_style='open-street-map', zoom=3, height=900)

In [21]:
from plotly.express import histogram
histogram(data_frame=df.sort_values(by='Mountain'), x='Date', color='Mountain')

In [22]:
histogram(data_frame=df, x='Mountain')

In [23]:
for column in ['Nationality', 'Cause of death']:
    histogram(data_frame=df, y=column, height=1500).show()

In [24]:
from plotly.express import strip
strip(data_frame=df, y='Nationality', x='Mountain', hover_name='Name', hover_data=['Date'], height=1500, stripmode='overlay', color='year')

In [25]:
from plotly.express import scatter
scatter(data_frame=df[['Nationality', 'Mountain']].groupby(by=['Nationality', 'Mountain']).size().reset_index().rename(columns={0: 'count'}),
        x='Mountain', y='Nationality', size='count', height=1500)