In [5]:
%load_ext autoreload
%autoreload 2

In [45]:
import os
import requests
from bs4 import BeautifulSoup
import urllib.request as ulib
from typing import Union
from urllib.request import Request, urlopen
from alpha_dictionary_indicatos import indicators_dictionary

def extract_text_from_article(url):
    """Extracts text content from div with class 'caas-body' in article request

    Args:
        url: URL which points to the article

    Returns:
        A string containing the extracted text.
    """
    # headers = {'user-agent': 'my-app'}
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36', "Upgrade-Insecure-Requests": "1","DNT": "1","Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8","Accept-Language": "en-US,en;q=0.5","Accept-Encoding": "gzip, deflate"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raises an HTTPError if the HTTP request returned an unsuccessful status code

        soup = BeautifulSoup(response.content, 'html.parser')

        if soup:
            # return soup.get_text(separator=' ', strip=True)
            return soup
        else:
            raise ValueError('Not exist content!!!')
    except Exception as e:
        print(f"Could not fetch article: {e}")
        return ""


def get_content(content: str):
    content = content.get_text(separator=' ', strip=True)
    return content[ : content.find("Article Sources")]

In [7]:
url = "https://www.investopedia.com/financial-term-dictionary-4769738/"
terms = extract_text_from_article(url)
terms

<!DOCTYPE html>

<html class="comp dictionaryTemplate base-layout mntl-html no-js" data-ab="99,99,99,99,99,99,99,99,99,99,99,62,99,99,99,99,99,99,99,99,99" data-finance-resource-version="2.87.0" data-mantle-resource-version="4.0.492" data-mm-ads-resource-version="1.2.95" data-mm-transactional-resource-version="1.11.15" data-mm-video-resource-version="1.4.0" data-resource-version="2.87.0" data-tracking-container="true" id="dictionaryTemplate_1-0" lang="en"><!--
<globe-environment environment="k8s-prod" application="finance" dataCenter="us-west-1"/>
-->
<head class="loc head">
<link href="//js-sec.indexww.com" rel="preconnect"/>
<link href="//c.amazon-adsystem.com" rel="preconnect"/>
<link href="//securepubads.g.doubleclick.net" rel="preconnect"/>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="max-image-preview:large, NOODP, NOYDIR" name="robots">
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<link href="https://www

In [8]:
table = terms.find_all('div', class_='comp dictionary-top24-list mntl-block')[0].find_all('a', class_ = "dictionary-top24-list__heading-link mntl-text-link")

In [25]:
alpha_dict = {}
for t in table:
    key = t.find('span', class_="link__wrapper").text
    link = t['href']

    term_t = extract_text_from_article(link + '/')
    list_t = term_t.find_all('div', class_="comp dictionary-top300-list__list-content mntl-block")[0].\
                    find_all('a', class_ = "dictionary-top300-list__list mntl-text-link")

    list_indicators = []
    for lt in list_t:
        lt_name = lt.find('span', class_="link__wrapper").text
        lt_link = lt['href']

        if key in indicators_dictionary and lt_name in indicators_dictionary[key]:
            list_indicators.append({"name": lt_name, "link": lt_link})

    if list_indicators:
        alpha_dict[key] = {
            "Indicators": list_indicators,
            "Link_list_indicators": link
        }

alpha_dict

{'#': {'Indicators': [{'name': '52-Week High/Low',
    'link': 'https://www.investopedia.com/terms/1/52weekhighlow.asp'},
   {'name': '52-Week Range',
    'link': 'https://www.investopedia.com/terms/1/52-week-range.asp'}],
  'Link_list_indicators': 'https://www.investopedia.com/terms-beginning-with-num-4769350'},
 'a': {'Indicators': [{'name': 'Accumulation/Distribution Indicator (A/D)',
    'link': 'https://www.investopedia.com/terms/a/accumulationdistribution.asp'},
   {'name': 'Advance/Decline Line (A/D)',
    'link': 'https://www.investopedia.com/terms/a/advancedeclineline.asp'},
   {'name': 'Aroon Indicator',
    'link': 'https://www.investopedia.com/terms/a/aroon.asp'},
   {'name': 'Aroon Oscillator',
    'link': 'https://www.investopedia.com/terms/a/aroonoscillator.asp'},
   {'name': 'Ascending Channel',
    'link': 'https://www.investopedia.com/terms/a/ascendingchannel.asp'},
   {'name': 'Ascending Triangle',
    'link': 'https://www.investopedia.com/terms/a/ascendingtriangle.a

In [47]:
for alpha in alpha_dict:
    indicators = alpha_dict[alpha]['Indicators']
    for indicator in indicators:
        content = extract_text_from_article(indicator['link'])
        content = get_content(content)
        indicator['content'] = content


In [48]:
alpha_dict

{'#': {'Indicators': [{'name': '52-Week High/Low',
    'link': 'https://www.investopedia.com/terms/1/52weekhighlow.asp',
    'content': '52-Week High/Low: Definition, Role in Trading, and Example Investing Stocks Cryptocurrency Bonds ETFs Options and Derivatives Commodities Trading Automated Investing Brokers Fundamental Analysis Markets View All Simulator Login / Portfolio Trade Research My Games Leaderboard Banking Savings Accounts Certificates of Deposit (CDs) Money Market Accounts Checking Accounts View All Personal Finance Budgeting and Saving Personal Loans Insurance Mortgages Credit and Debt Student Loans Taxes Credit Cards Financial Literacy Retirement View All Economy Government and Policy Monetary Policy Fiscal Policy Economics View All News Markets Companies Earnings CD Rates Mortgage Rates Economy Government Crypto ETFs Personal Finance View All Reviews Best Online Brokers Best Savings Rates Best CD Rates Best Life Insurance Best Personal Loans Best Mortgage Rates Best Mone

In [49]:
import json

with open('list_indicators.json', 'w') as outfile:
    json.dump(alpha_dict, outfile)