### Importing libraries / declaring classes

In [None]:
import os
from selenium import webdriver
import time
from bs4 import BeautifulSoup
import json
import re
import pandas as pd
from pandas.io.json import json_normalize
import urllib.request
import requests
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects

In [None]:
class color:
    """This class displays text in color when their attributes are called """
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    DARKCYAN = '\033[36m'
    BLUE = '\033[94m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    RED = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    END = '\033[0m'

### Initializing selenium webdriver

In [None]:
def get_driver(browser):
    """This function asks the user for the directory of the selenium webdriver until it is correctly given"""
    while True:
        try:
            directory = input("Please enter the directory where the selenium webdriver is on your computer")
            access = getattr(webdriver, browser)
            driver = access(directory)
            driver.get("https://coinmarketcap.com/api/documentation/v1/#section/Errors-and-Rate-Limits")
            time.sleep(5)
            page_source = driver.page_source
            time.sleep(3)
            driver.quit()
            return page_source
            break
        except:
            print(f"{color.RED}Not a valid {browser} webdriver location{color.END}")

### Scraping info

In [None]:
def get_browsers():
    """This functions scraps the list of available browsers for selenium's webdriver"""
    url = 'https://pypi.org/project/selenium/'
    html = requests.get(url).content
    soup = BeautifulSoup(html,'lxml')
    browsers = soup.select('div[class="project-description"] > div[id="drivers"] > table > tbody > tr > td > strong')
    browsers_clean = [element.text for element in browsers]
    return browsers_clean

In [None]:
def define_browser(browsers):
    """This functions ensures that the user gives a supported browser by selenium's webdriver"""
    while True:
        print(f"{color.BOLD}Options: {color.END}{', '.join(browsers)}")
        browser = input("What browser are you using for selenium's webdriver?")
        if browser.capitalize() in browsers:
            print(f"{color.GREEN}Valid browser{color.END}")
            return browser.capitalize()
            break
        else:
            print(f"{color.RED}Not a valid browser{color.END}")

In [None]:
def get_errors(page_source):
    """"This function scraps the CoinMarketCap webpage to
    determine what type of error you are getting when a 
    connection can not be established"""
    soup = BeautifulSoup(page_source, 'lxml')
    data = soup.select('td')
    vals = [data[i].text for i in range(len(data))]
    
    #Scraping the first occurrence of the error
    for i in range(len(vals)):
        if vals[i] == '401':
            table = vals[i:]
            break
            
    #Scraping the last occurrence of the error
    for i in range(len(table)):
        if ("You've hit an IP rate limit") in table[i]:
            results = table[:i+1]

    del results[1::3] 

    error_keys = results[0::2]
    error_values = results[1::2]
    
    #Returning a dictonary of errors {status code : description}
    errors = dict(zip(error_keys, error_values))
    return errors

In [None]:
def get_currency_code(page_source):
    """"This function scraps the CoinMarketCap webpage to
    get a list of all the supported currencies"""
    soup = BeautifulSoup(page_source, 'lxml')
    data_2 = soup.select('td')
    vals_2 = [data_2[i].text for i in range(len(data_2))]

    #Scraping the first value
    for i in range(len(vals_2)):
        if vals_2[i] == 'United States Dollar ($)':
            table_2 = vals_2[i:]
            break
            
    #Scraping the last value
    for i in range(len(table_2)):
        if ("VND") in table_2[i]:
            results_2 = table_2[:i+1]
    
    #Returning a dictionary of {currency code = description}
    currencies = dict(zip(results_2[1::3],results_2[0::3]))
    return currencies

### Getting user inputs

In [None]:
def check_path(): 
    """"This function ensures that the path where the user wants to save the csv file is valid"""
    while True:
        path = input("Where would you like to save your csv file? (Hint: it should be a path on your computer)")
        isDir = os.path.isdir(path) 
        if isDir == True:
            print(f"{color.GREEN}Valid path{color.END}")
            return path
            break
        else:
            print(f"{color.RED}Invalid path{color.END}")

In [None]:
def file_name(path):
    """"This function forces the user to name the csv file with illegal or avoidable characters and/or
    that have a lenght over 31 characters"""
    
    #Regex to find occurrences of illegal/avoidable character
    while True:
        filename = input("Please choose the name of your csv file")
        invalid_characters = re.findall(r'[#%&{}\\\\<>*?/$!\'":@+`|=\s]',filename)
        if (len(filename) == 0):
            invalid_characters = [' ']
        if(len(invalid_characters)>0 or len(filename)>31):
            if(len(invalid_characters)>0):
                list_set = set(invalid_characters) 
                unique_list = (list(list_set)) 
                for i in unique_list:
                    if i == unique_list[len(unique_list)-1]:
                        if i == ' ':
                            i = 'blank spaces'
                        print(f"{color.RED}{i}{color.END} are characters to avoid in filenames, please choose another name.")
                    else:
                        if i == ' ':
                            i = 'blank spaces'
                        print(f"{color.RED}{i}{color.END}",end =", ")
            #Check if the lenght is under 31 characters
            if (len(filename)>31):
                print("Please keep your filenames to a reasonable length and be sure they are under 31 characters")
        else:
            print(f"{color.GREEN}{filename}{color.END} is a valid filename")
            #Find if a slash is needed at the end of the path based on the user's path given and appending the file format
            if path[-1] == '/':
                full_filename = f"{path}{filename}.csv"
                return full_filename
            else:
                full_filename = f"{path}/{filename}.csv"
                return full_filename
            break

In [None]:
def get_currency(currencies):
    """"This function makes sure that the currency given by the user is supported by CoinMarketCap"""
    while True:
        currency = str(input("What currency would you like to convert to? Hint: It must be a currency code (ISO 8601)").upper())
        if currency in currencies:
            print(f"The currency is {color.BOLD}{currency}: {currencies[currency]}{color.END}")
            return currency
            break
        else:
            print(f"{color.RED}That's not a valid currency{color.END}")

In [None]:
def get_limit():
    """"This function asks the user for a limit of coins to be displayed"""
    while True:
        try:
            limit = int(input("How many coins would you like to see?"))
            #If it exceeds the limit (5000 coins), ask the user to give a smaller amount 
            if limit > 5000:
                print("Please provide a smaller number")
            else:
                print(f"Limit set to {color.BOLD}{limit}{color.END} coins")
                return limit
                break
        except ValueError:
            print(f"{color.RED}That's not a valid number{color.END}")

In [None]:
def call_API(limit, currency, errors):
    """This functions calls the CoinMarketCap API and outputs the result as
    json based on the parameters previosly given by the user
    """
    
    print("If you don't have a CoinMarketCap API key, you can get one free by creating an account at:\n\
https://pro.coinmarketcap.com/signup")
    
    while True:
        url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
        API_key = input("Please provide your CoinMarketCap API key: ")

        parameters = {
          'start':'1',
          'limit': limit,
          'convert': currency
        }
        headers = {'Accepts': 'application/json','X-CMC_PRO_API_KEY': API_key.strip(),}

        session = Session()
        session.headers.update(headers)

        try:
            #If a connection can not be established, print the error status code previously scraped 
            response = session.get(url, params=parameters)
            response_number = re.findall(r'\d+',str(response))[0]
            if response_number in errors:
                for i in errors:
                    if response_number == i:
                        print(f"{color.RED}{i}: {errors[i]} {color.END}")
            else:
                #Returns json 
                print(f"{color.GREEN}API key is valid{color.END}")
                data = json.loads(response.text)
                return(data)
                break

        except (ConnectionError, Timeout, TooManyRedirects) as e:
            print(e)

In [None]:
def sort_type():
    """This function determines wether the user wants to sort the 
    DataFrame in ascending or descending order"""
    while True:
        sorting = input("Would you like an ascending sorting? Y/N?")
        if sorting.lower() == "y":
            tipo = 'ascending'
            return tipo
            break
        if sorting.lower() == "n":
            tipo = 'descending'
            return tipo
            break
        else:
            print(f"{color.RED}Invalid input{color.END}")

In [None]:
def sort_value(type):
    """Returns True or False depending on the user's choice to sort the DataFrame"""
    if type == 'ascending':
        return True
    else:
        return False

### DataFrame display and visualization

In [None]:
def flatten(data, col_list):
    """Flattens the data given within a DataFrame for each column specified in the 'col_list' parameter
    and appends it to the DataFrame"""
    for column in col_list:
        flattened = pd.DataFrame(dict(data[column])).transpose()
        columns = [str(col) for col in flattened.columns]
        flattened.columns = [column + '_' + colname for colname in columns]
        data = pd.concat([data, flattened], axis=1)
        #Drops the previously semi-structured json data witihin the column
        data = data.drop(column, axis=1)
    return data

In [None]:
def cleaning_columns(data, currency):
    """This function selects the most important columns to display in the DataFrame with the correct format"""
    df = pd.DataFrame(data['data'])
    #Columns that have to be flattened
    nested_columns = ['quote',f'quote_{currency}']
    #Flattening the columns
    flat = flatten(df, nested_columns)
    #Selecting most relevant columns
    result = flat[['name', 'symbol','cmc_rank',f'quote_{currency}_price',f'quote_{currency}_volume_24h',\
                   f'quote_{currency}_percent_change_1h', f'quote_{currency}_percent_change_24h',
                   f'quote_{currency}_percent_change_7d',f'quote_{currency}_market_cap']]
    #Renaming the columns
    result.columns = ['Coin name', 'Symbol','CMC rank',f'Price ({currency})','24h volume','1h % change',\
          '24h % change','7d % change','Market cap']
    return result

In [None]:
def sorted_table(result):
    """This function asks the user to give the column for the DataFrame to be sorted"""
    #Displaying only 5 decimals
    pd.options.display.float_format = '{:,.5f}'.format
    options = [i for i in result.columns]
    options_case = [i.lower() for i in result.columns]
    
    #Printing the columns the DataFrame can be sorted
    while True: 
        print(f"{color.BOLD}Options: {color.END}")
        
        for i in options:
            if i == options[len(options)-1]:
                print(i,end =".\n")
            else:
                print(i,end =", ")
                
        sort_option = input("How would you like to sort the table?\n")
        if sort_option.lower() not in options_case:
            print(f"{color.RED}That's not a valid sorting option{color.END}")
        else:
            #Returning the column name for the data to be sorted
            for i in range(len(options_case)):
                if options_case[i] == sort_option.lower():
                    return options[i]
            break

In [None]:
def display_result(result, currency, limit, sort_col,currencies,s_type):
    """This function gathers all the variables for the DataFrame to be displayed given the user's requests"""
    sorted_result = result.sort_values(by=sort_col, ascending=sort_value(s_type))
    print(f"{color.UNDERLINE}DataFrame created for top {limit} coins with prices displayed on {currencies[currency][:-4]}'s, sorted by '{sort_col}' ({s_type}){color.END}")
    return sorted_result

### Running all functions

In [None]:
def my_function():
    """This function calls all of the other functions for the API to retrieve the required coin data and
    save it as a csv file in a specific directory"""
    
    print("Initializing function...")
    #Retrives path and directory for the csv file to be saved
    path = check_path()
    full_path = file_name(path)
    
    #Starts selenium's webdriver to scrap CoinMarketCap's data for the user to give valid inputs
    print("Starting driver...")
    browsers_list = get_browsers()
    browser = define_browser(browsers_list)
    source = get_driver(browser)
    print(f"{color.GREEN}Driver started{color.END}")
    print("Getting info...")
    errors = get_errors(source)
    codes = get_currency_code(source)
    print(f"{color.GREEN}Successfully received{color.END}")
    currency = get_currency(codes)
    limit = get_limit()
    
    #Calls the API to retrieve all the coin data based on the user's given requests
    data = call_API(limit, currency, errors)
    result = cleaning_columns(data, currency)
    sort_parameters = sorted_table(result)
    order = sort_type()
    df_sorted = display_result(result, currency, limit, sort_parameters,codes,order)
    
    #Saves the DataFrame as a csv file
    df_sorted.to_csv(full_path, index=False)
    print(f"{color.GREEN}Successfully saved to: {full_path} {color.END}")
    
    return df_sorted

### Result

In [None]:
#Calling the main function
clean_df = my_function()

In [None]:
#Print the DataFrame
clean_df