# Questions to answer

- Which are the most difficult mountains to climb?
- What were the most popular years for climbing?
- Are taller mountains more difficult to climb?
- Are mountains over 8000m more popular for climbing?
- Which mountain ranges contain the highest mountains?

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("mountains.csv")

In [None]:
df.head(5)

In [None]:
df.info()

In [None]:
df.dtypes # () not required

In [None]:
df.describe()

In [None]:
df.set_index("Mountain", inplace=True)

In [None]:
df.head(5)

In [None]:
#Dropping column

df.drop(["Rank", "Height (ft)", "Coordinates", "Parent mountain"], axis=1, inplace=True) # axis = 1 means columns

In [None]:
df.head(5)

In [None]:
df.loc["Muztagh Ata"]

In [None]:
df.drop(
    ["Mount Everest / Sagarmatha / Chomolungma", "Muztagh Ata"], axis = 0, inplace = True
)

In [None]:
df.head(5)

#### Selecting entries based on Boolean condition

In [34]:
df[df["First ascent"]== "unclimbed"]

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Gangkhar Puensum,7570,2995,Kula Kangri Himalaya,unclimbed,0,3.0
Labuche Kang III / East,7250,570,Labuche Himalaya,unclimbed,0,0.0
Karjiang,7221,880,Kula Kangri Himalaya,unclimbed,0,2.0
Tongshanjiabu,7207,1757,Lunana Himalaya,unclimbed,0,0.0


In [35]:
df["First ascent"]

Mountain
K2 / Qogir / Godwin Austen      1954
Kangchenjunga                   1955
Lhotse                          1956
Makalu                          1955
Cho Oyu                         1954
                                ... 
Noijin Kangsang / Norin Kang    1986
Langtang Ri                     1981
Kangphu Kang                    2002
Singhi Kangri                   1976
Lupghar Sar                     1979
Name: First ascent, Length: 116, dtype: object

In [36]:
df["First ascent"]== "unclimbed"

Mountain
K2 / Qogir / Godwin Austen      False
Kangchenjunga                   False
Lhotse                          False
Makalu                          False
Cho Oyu                         False
                                ...  
Noijin Kangsang / Norin Kang    False
Langtang Ri                     False
Kangphu Kang                    False
Singhi Kangri                   False
Lupghar Sar                     False
Name: First ascent, Length: 116, dtype: bool

In [37]:
# We want only climbed mountain
# In general when selecting a subset of a DataFrame and reassigning it to same
# variable (or a new variable) it is a good practice to use the copy() method.

df = df[df["First ascent"]!= "unclimbed"].copy()

In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 112 entries, K2 / Qogir / Godwin Austen to Lupghar Sar
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Height (m)                 112 non-null    int64  
 1   Prominence (m)             112 non-null    int64  
 2   Range                      112 non-null    object 
 3   First ascent               112 non-null    object 
 4   Ascents bef. 2004          110 non-null    object 
 5   Failed attempts bef. 2004  110 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 6.1+ KB


In [39]:
# Look at another example - to select all mountains that are over 8000 m

df["Height (m)"] > 8000

Mountain
K2 / Qogir / Godwin Austen       True
Kangchenjunga                    True
Lhotse                           True
Makalu                           True
Cho Oyu                          True
                                ...  
Noijin Kangsang / Norin Kang    False
Langtang Ri                     False
Kangphu Kang                    False
Singhi Kangri                   False
Lupghar Sar                     False
Name: Height (m), Length: 112, dtype: bool

In [40]:
# Then select only mountains greater than 8000 m as the list

df[df["Height (m)"] > 8000].head()

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
K2 / Qogir / Godwin Austen,8611,4017,Baltoro Karakoram,1954,45,44.0
Kangchenjunga,8586,3922,Kangchenjunga Himalaya,1955,38,24.0
Lhotse,8516,610,Mahalangur Himalaya,1956,26,26.0
Makalu,8485,2386,Mahalangur Himalaya,1955,45,52.0
Cho Oyu,8188,2340,Mahalangur Himalaya,1954,79,28.0


In [41]:
# To get the mountains that are in the Annapurna Himalaya range

df[df['Range'] == 'Annapurna Himalaya']

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Annapurna I,8091,2984,Annapurna Himalaya,1950,36,47.0
Annapurna II,7937,2437,Annapurna Himalaya,1960,6,19.0
Annapurna III,7555,703,Annapurna Himalaya,1961,10,17.0
Gangapurna,7455,563,Annapurna Himalaya,1965,8,13.0
Annapurna Dakshin,7219,775,Annapurna Himalaya,1964,10,16.0


In [42]:
df.info() # There are some null records

<class 'pandas.core.frame.DataFrame'>
Index: 112 entries, K2 / Qogir / Godwin Austen to Lupghar Sar
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Height (m)                 112 non-null    int64  
 1   Prominence (m)             112 non-null    int64  
 2   Range                      112 non-null    object 
 3   First ascent               112 non-null    object 
 4   Ascents bef. 2004          110 non-null    object 
 5   Failed attempts bef. 2004  110 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 10.2+ KB


In [43]:
# Remove the records that contains null values

df.dropna(inplace=True)

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 110 entries, K2 / Qogir / Godwin Austen to Lupghar Sar
Data columns (total 6 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Height (m)                 110 non-null    int64  
 1   Prominence (m)             110 non-null    int64  
 2   Range                      110 non-null    object 
 3   First ascent               110 non-null    object 
 4   Ascents bef. 2004          110 non-null    object 
 5   Failed attempts bef. 2004  110 non-null    float64
dtypes: float64(1), int64(2), object(3)
memory usage: 6.0+ KB


In [45]:
# Fixing data types

df.dtypes

Height (m)                     int64
Prominence (m)                 int64
Range                         object
First ascent                  object
Ascents bef. 2004             object
Failed attempts bef. 2004    float64
dtype: object

In [46]:
df.head(5)

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
K2 / Qogir / Godwin Austen,8611,4017,Baltoro Karakoram,1954,45,44.0
Kangchenjunga,8586,3922,Kangchenjunga Himalaya,1955,38,24.0
Lhotse,8516,610,Mahalangur Himalaya,1956,26,26.0
Makalu,8485,2386,Mahalangur Himalaya,1955,45,52.0
Cho Oyu,8188,2340,Mahalangur Himalaya,1954,79,28.0


### Chane the data type integer to First ascent, Ascents before. 2004 and Failed attempts before. 2004

In [47]:
df["First ascent"] = df["First ascent"].astype(int)

In [48]:
df["Ascents bef. 2004"] = df["Ascents bef. 2004"].astype(int)

In [49]:
df["Failed attempts bef. 2004"] = df["Failed attempts bef. 2004"].astype(int)

In [50]:
df.dtypes # Check the data types again

Height (m)                    int64
Prominence (m)                int64
Range                        object
First ascent                  int64
Ascents bef. 2004             int64
Failed attempts bef. 2004     int64
dtype: object

### Soring clean dataframe as the new CSV

In [52]:
new_filename = "Mountains_Cleaned.csv"
df.to_csv(new_filename, encoding= "utf-8")

## Analyzing Data

In [53]:
# Total Attempts

df["Total attempts"] = df["Ascents bef. 2004"] + df["Failed attempts bef. 2004"]

In [55]:
# Success rate

df["Success rate"] = (df["Ascents bef. 2004"] / df["Total attempts"]) * 100

In [56]:
# Difficulty

df["Difficulty"] = (df["Total attempts"] / df["Success rate"]) * 100 

In [58]:
df.head(5)

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004,Total attempts,Success rate,Difficulty
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
K2 / Qogir / Godwin Austen,8611,4017,Baltoro Karakoram,1954,45,44,89,50.561798,176.022222
Kangchenjunga,8586,3922,Kangchenjunga Himalaya,1955,38,24,62,61.290323,101.157895
Lhotse,8516,610,Mahalangur Himalaya,1956,26,26,52,50.0,104.0
Makalu,8485,2386,Mahalangur Himalaya,1955,45,52,97,46.391753,209.088889
Cho Oyu,8188,2340,Mahalangur Himalaya,1954,79,28,107,73.831776,144.924051


In [59]:
# Normalize the difficulty

df["Difficulty"] = df["Difficulty"] / df["Difficulty"].max()

In [60]:
df.head(5)

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004,Total attempts,Success rate,Difficulty
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
K2 / Qogir / Godwin Austen,8611,4017,Baltoro Karakoram,1954,45,44,89,50.561798,0.646364
Kangchenjunga,8586,3922,Kangchenjunga Himalaya,1955,38,24,62,61.290323,0.371458
Lhotse,8516,610,Mahalangur Himalaya,1956,26,26,52,50.0,0.381894
Makalu,8485,2386,Mahalangur Himalaya,1955,45,52,97,46.391753,0.767786
Cho Oyu,8188,2340,Mahalangur Himalaya,1954,79,28,107,73.831776,0.532169


In [61]:
# Sort the Difficulty  from descending order and updated as df

df = df.sort_values(by="Difficulty", ascending=False)

In [62]:
df.head(5) # Check 

Unnamed: 0_level_0,Height (m),Prominence (m),Range,First ascent,Ascents bef. 2004,Failed attempts bef. 2004,Total attempts,Success rate,Difficulty
Mountain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Nanga Parbat,8126,4608,Nanga Parbat Himalaya,1953,52,67,119,43.697479,1.0
Makalu,8485,2386,Mahalangur Himalaya,1955,45,52,97,46.391753,0.767786
Annapurna I,8091,2984,Annapurna Himalaya,1950,36,47,83,43.373494,0.702689
Manaslu,8163,3092,Manaslu Himalaya,1956,49,45,94,52.12766,0.662169
K2 / Qogir / Godwin Austen,8611,4017,Baltoro Karakoram,1954,45,44,89,50.561798,0.646364
