<a href="https://colab.research.google.com/github/mazzidougs/Webscraping-RealEstateFunds/blob/main/WebScraping__investidor10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Webscraping for Real Estate Funds Investors


This project involves the development of a web scraping tool designed specifically to extract financial data from the Investidor 10 website. Investidor 10 is a renowned online platform that provides comprehensive information on various financial instruments, including stocks, bonds, and other investment opportunities. The primary objective of this tool is to automate the collection of vital financial data, which can significantly enhance investment decision-making processes. By efficiently gathering and processing data from Investidor 10, the tool aims to offer users timely and accurate financial insights, thereby supporting better investment strategies. This project not only showcases technical prowess in web scraping techniques but also underscores a practical application in the financial sector, offering valuable resources for investors seeking informed decisions in their investment ventures.

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import pytz

# Set the timezone to São Paulo, Brazil
brazil_tz = pytz.timezone('America/Sao_Paulo')

# Base URL and headers
base_url = 'https://investidor10.com.br/fiis/'
headers = {'User-Agent': 'Mozilla/5.0'}

# List of fund tickers you are interested in
tickers = ['mxrf11', 'btci11', 'mchf11', 'vghf11', 'vgir11', 'xpml11',
           'irdm11', 'recr11', 'vgip11', 'rbrr11', 'vcjr11', 'vrta11',
           'cvbi11', 'bbpo11', 'rbry11', 'afhi11', 'cvbi11', 'hgcr11',
           'kncr11', 'mcci11', 'vrta11', 'tgar11', 'brco11', 'VSLH11',
           'pvbi11', 'xpml11', 'rbrf11', 'KISU11', 'TORD11', 'RBVO11',
           'SCPF11', 'BLMR11', 'FLMA11', 'GALG11', 'AJFI11', 'xplg11']

def get_fund_data(ticker):
    fund_url = f"{base_url}{ticker}/"
    response = requests.get(fund_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Assuming the ticker name is same as provided, if not extract from the page
    ticker_name = soup.find('h2', class_='ticker-name').text.strip() if soup.find('h2', class_='ticker-name') else ticker.upper()
    # Extract the price, P/VP, and Dividend Yield (12M)
    # The path is: Selector Path (open on your mac the "Develop" tab on Safari -> click on "Show Web Inspector" -> select any element from the page -> Copy -> Selector Path)
    value = soup.select_one("#cards-ticker > div:nth-of-type(1) > div._card-body > div > span.value")
    p_vp = soup.select_one("#cards-ticker > div:nth-of-type(3) > div._card-body > span")
    dividend_yield = soup.select_one("#cards-ticker > div:nth-of-type(2) > div._card-body > div > span")
    last_return = soup.select_one("#table-indicators > div:nth-child(15) > div.desc > div > span")

    value = value.text.strip() if value else 'N/A'
    p_vp = p_vp.text.strip() if p_vp else 'N/A'
    dividend_yield = dividend_yield.text.strip() if dividend_yield else 'N/A'
    last_return = last_return.text.strip() if last_return else 'N/A'

    current_time = datetime.now(brazil_tz).strftime("%Y-%m-%d %H:%M")

    return {"Ticker": ticker_name, "Price": value, "P/VP": p_vp, "Dividend Yield (12M)": dividend_yield, "Last Return": last_return, "Date": current_time}

# List to hold all fund data
all_fund_data = []

# Loop through each ticker and get the data
for ticker in tickers:
    try:
        fund_data = get_fund_data(ticker)
        all_fund_data.append(fund_data)
    except Exception as e:
        print(f"Failed to get data for {ticker}: {e}")


def get_price(text):
    """Extracts float price value from the string."""
    try:
        # Assuming price format is "R$ 10,56"
        return float(text.replace("R$", "").replace(",", ".").strip())
    except:
        return float('inf')  # Return a high value if there's an issue with conversion
# Convert to DataFrame
df = pd.DataFrame(all_fund_data)

# Convert price string to float and sort by it
df['Price'] = df['Price'].apply(get_price)  # Convert price text to float for sorting
df = df.sort_values(by='Price', ascending=True)  # Sort by price ascending

# Reset the index and drop the old index column
df = df.reset_index(drop=True)

df.head(None)

Unnamed: 0,Ticker,Price,P/VP,Dividend Yield (12M),Last Return,Date
0,TORD11,2.18,17,"2,27%","R$ 0,05",2024-01-18 19:56
1,VSLH11,4.18,39,"11,93%","R$ 0,04",2024-01-18 19:56
2,SCPF11,4.56,39,"5,55%","R$ 0,02",2024-01-18 19:56
3,BLMR11,6.72,89,"8,90%","R$ 0,05",2024-01-18 19:56
4,RBVO11,8.2,49,"2,85%","R$ 0,04",2024-01-18 19:56
5,KISU11,8.44,94,"10,45%","R$ 0,08",2024-01-18 19:56
6,MCHF11,9.03,96,"12,05%","R$ 0,07",2024-01-18 19:55
7,GALG11,9.07,91,"10,92%","R$ 0,08",2024-01-18 19:56
8,VGHF11,9.53,102,"13,64%","R$ 0,10",2024-01-18 19:55
9,AJFI11,9.66,96,"1,45%","R$ 0,07",2024-01-18 19:56


In [None]:
print(df)

    Ticker   Price  P/VP Dividend Yield (12M) Last Return              Date
0   TORD11    2.18  0,17                2,27%     R$ 0,05  2024-01-18 19:56
1   VSLH11    4.18  0,39               11,93%     R$ 0,04  2024-01-18 19:56
2   SCPF11    4.56  0,39                5,55%     R$ 0,02  2024-01-18 19:56
3   BLMR11    6.72  0,89                8,90%     R$ 0,05  2024-01-18 19:56
4   RBVO11    8.20  0,49                2,85%     R$ 0,04  2024-01-18 19:56
5   KISU11    8.44  0,94               10,45%     R$ 0,08  2024-01-18 19:56
6   MCHF11    9.03  0,96               12,05%     R$ 0,07  2024-01-18 19:55
7   GALG11    9.07  0,91               10,92%     R$ 0,08  2024-01-18 19:56
8   VGHF11    9.53  1,02               13,64%     R$ 0,10  2024-01-18 19:55
9   AJFI11    9.66  0,96                1,45%     R$ 0,07  2024-01-18 19:56
10  VGIR11    9.80  1,00               14,80%     R$ 0,11  2024-01-18 19:55
11  BTCI11    9.98  0,98               12,24%     R$ 0,09  2024-01-18 19:55
12  MXRF11  