Importing everything I need at the start of the file

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

Using both databases

In [4]:
try:
    df1 = pd.read_csv('./mountains_db.tsv', sep='\t', header=None, names=["Name", "Height", "Country", "ISO"])
except RuntimeError as e:
    print(f"Error reading countries_db.tsv: {e}")
    df1 = pd.DataFrame(columns=["Name", "Height", "Country", "ISO"])

try: 
    df2 = pd.read_csv('./countries_db.tsv', sep='\t', header=None, names=["ISO", "Continent"])
except RuntimeError as e:
    print(f"Error reading countries_db.tsv, failed with: {e}")

try:
    final_df = pd.merge(df1, df2, on="ISO", how="inner")
except RuntimeError as e:
    print(f"Error merging dataframes: {e}")

Now, I have a table with the next structure: ISO, Continent, Country, Name , Height. So, each continent needs to have an array with the heights it has in it. 

In [6]:
try:
    continents = final_df.groupby(by="Continent")['Height'].apply(list).to_dict()
    print("Continents grouped by height:", continents)
except RuntimeError as e:
    print(f"Error grouping by continent: {e}")
    continents = {}

Continents grouped by height: {'Africa': [406.0, 898.0, 449.0, 600.0, 569.0, nan, 257.0, 171.0, 243.0, 521.0, nan, nan, 648.0, 1003.0, 865.0, 424.0, 411.0, 12.0, nan, 360.0, nan, nan, 794.0, 888.0, 927.0, 918.0, 927.0, 886.0, nan, 432.0, 451.0, 798.0, 477.0, nan, 685.0, 531.0, 576.0, 543.0, 489.0, nan, 517.0, 1021.0, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 1670.0, nan, 1214.0, 664.0, 844.0, 1410.0, 953.0, 1150.0, 1421.0, 1017.0, 990.0, 1074.0, 1040.0, 1275.0, nan, 628.0, 668.0, 873.0, 819.0, 649.0, 1578.0, 771.0, 1022.0, 1112.0, 816.0, 722.0, 1178.0, 899.0, 688.0, 1075.0, 836.0, 860.0, 1105.0, 976.0, 1132.0, 1315.0, 1213.0, 1609.0, 2193.0, 2750.0, 1695.0, 2504.0, 1894.0, 1878.0, 1351.0, 1324.0, 2063.0, 966.0, 808.0, 601.0, 700.0, 521.0, 1350.0, 1212.0, 1272.0, 1279.0, 2053.0, 2947.0, 3308.0, 2293.0, 1927.0, 1009.0, nan, 958.0, 1525.0, 1091.0, nan, 1274.0, 1368.0, 4277.0, 3285.0, 1447.0, 1885.0, 2316.0, 2057.0, 1985.0, 2503.0, 1550.0, 1654.0, 2133.0, 2118.0, 2013.0, 

Now I need to create a graphic of box type.

In [None]:
try:
    plt.figure(figsize=(12, 6))
    plt.title("Median and Std Dev of the random value within each quantile")
    plt.xlabel("quantile")
    plt.ylabel("values")
    plt.boxplot(continents, positions=range(len(continents)) labels=continents.keys() widths=0.6, whis=[0, 100])
    plt.xticks(range(len(continents)), continents.index.tolist())
    plt.plot(continents["quantile"], continents["max"], label="Max", color="blue", marker="^")
    plt.plot(continents["quantile"], continents["median"], label="Median", color="green", marker="o")
    plt.plot(continents["quantile"], continents["min"], label="Min", color="red", marker="v")
    plt.legend()
    plt.show()
except RuntimeError as e:
    print(f"Error creating boxplot: {e}")
    plt.close()