# Imports

In [None]:
import json
import logging
import pandas as pd
import requests
import seaborn as sns
import helpers.hdbg as hdbg
import helpers.hprint as hprint

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

hprint.config_notebook()

# Load data

In [None]:
url = "https://coinmarketcap.com/rankings/exchanges/"
response = requests.get(url)
df = pd.read_html(response.text)[0]
df.loc[df["Volume(24h)"].isna()].shape

In [None]:
# Almost all the loaded data from url have NaNs so loading from fully saved html.
with open ("top.html", "r") as f:
    df = pd.read_html(f.read())[0]
_LOG.info(df.shape)
df.head(3)

In [None]:
# Check for NaNs.
df.loc[df["Volume(24h)"].isna()].shape

In [None]:
# Let's set it to an empty string first.
df.loc[df["Volume(24h)"].isna()] = ""
df.loc[df["Volume(24h)"].isna()].shape

# Create data frame with columns `name`, `volume`.

In [None]:
columns = list(df.columns)
columns.pop(1)
columns.pop(2)
name_volume_df = df.copy().drop(columns=columns)
name_volume_df.head(3)

## Convert types

In [None]:
# Clear and convert volume to integer.

name_volume_df = name_volume_df.convert_dtypes()
name_volume_df['Volume(24h)'] = name_volume_df['Volume(24h)'].map(lambda x: x[1:-6])
name_volume_df.loc[name_volume_df["Volume(24h)"] == ""] = "0"
name_volume_df['Volume(24h)'] = name_volume_df['Volume(24h)'].apply(lambda x: x.replace(",", ""))
name_volume_df['Volume(24h)'] = pd.to_numeric(name_volume_df['Volume(24h)'])
name_volume_df.head(3)

## Sorting by `volume`

In [None]:
name_volume_df.sort_values(['Volume(24h)'], ascending=False, ignore_index=True, inplace=True)
name_volume_df.head(3)

### Cumulative sum

In [None]:
cumsum = pd.DataFrame(name_volume_df['Volume(24h)'].cumsum())
cumsum.head()

In [None]:
# Cumulative sum of top-10 exchanges.
sns.barplot(x=name_volume_df['Name'][:10], y=cumsum['Volume(24h)'][:10])