## Status Invest

This script reads all stocks data available in https://statusinvest.com.br/ using scrapers.\
Giving you the possibility to filter out only the relevant stocks as desired.

In [None]:
import numpy
import os
import pandas
import sys

from sklearn.linear_model import LinearRegression  # scikit-learn
from tqdm import tqdm

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from libs.scrapers.statusinvest import (
    StockListScraper,
    StockExtraInfosScraper,
    StockHistIndicatorsScraper,
    StockHistEarningsScraper,
    StockHistPayoutsScraper,
    StockHistRevenueScraper,
    StockHistMarginsScraper,
    StockHistActivesAndPassivesScraper,
)

### Reading List of Stocks (plus general stock data)

In [2]:
stocks_general_data_df = StockListScraper().dataframe_format

# show most liquid stocks (it also shows only the first X columns)
stocks_general_data_df.sort_values("LIQUIDEZ MEDIA DIARIA", ascending=False, inplace=True)
print(stocks_general_data_df[stocks_general_data_df.columns[0:10]][0:5].to_string(index=False))
print(f"--- Total stock tickers in the list: {stocks_general_data_df.shape[0]} ---")

TICKER  PRECO    DY   P/L  P/VP  P/ATIVOS  MARGEM BRUTA  MARGEM EBIT  MARG. LIQUIDA  P/EBIT
 VALE3  67.76  8.97  6.47  1.62      0.69         40.12        31.07          23.03    4.80
 PETR4  40.45 18.09  3.85  1.37      0.51         51.08        38.69          25.53    2.54
 ITUB4  32.78  3.89 10.10  1.75      0.13         30.03         9.92           8.30    8.45
 BBDC4  15.35  7.25 11.80  0.98      0.09         30.94         3.95           6.18   18.48
 BBAS3  56.45  8.11  4.86  0.98      0.07         32.66        15.94          12.40    3.78
--- Total stock tickers in the list: 620 ---


### Filter out stocks

In [3]:
# by trading liquidity
stocks_general_data_df = stocks_general_data_df[stocks_general_data_df["LIQUIDEZ MEDIA DIARIA"] >= 1_000_000]

# filter out only the most liquid stock from each company
stocks_general_data_df["PREFIX"] = stocks_general_data_df["TICKER"].str.upper().str[:4]
stocks_general_data_df.sort_values("LIQUIDEZ MEDIA DIARIA", ascending=False, inplace=True)
stocks_general_data_df.drop_duplicates("PREFIX", keep="first", inplace=True)
stocks_general_data_df.drop("PREFIX", axis=1, inplace=True)

# show stock tickers count after filter applied
print(f"--- Filtered stock tickers in the list: {stocks_general_data_df.shape[0]} ---")

--- Filtered stock tickers in the list: 219 ---


In [4]:
stock_ticker = stocks_general_data_df.sort_values("LIQUIDEZ MEDIA DIARIA", ascending=False).index[0]

# show most liquid stock in the list
print(f"--- Most Liquid stock in the list: {stock_ticker} ---")

--- Most Liquid stock in the list: VALE3 ---


### Get Stocks extra infos

In [5]:
extra_infos_df = None

for stock_ticker in tqdm(stocks_general_data_df.index):
    ei_df = StockExtraInfosScraper(stock_ticker).dataframe_format
    ei_df.columns = [stock_ticker]
    ei_df = ei_df.T
    extra_infos_df = pandas.concat([extra_infos_df, ei_df])

#show sample of the retrieved data
print(extra_infos_df)

100%|██████████| 219/219 [04:54<00:00,  1.35s/it]

      Vol Histórica Tag Along Tickers Opções      Segmento Free Float
VALE3     26.615409    100.0%         1000.0  Novo Mercado     98.01%
PETR4     30.550062    100.0%         1309.0       Nível 2     63.39%
ITUB4     22.506656     80.0%          414.0       Nível 1     52.89%
BBDC4     28.159352     80.0%          488.0       Nível 1     62.46%
BBAS3       22.7558    100.0%          639.0  Novo Mercado     49.58%
...             ...       ...            ...           ...        ...
SGPS3    146.992397    100.0%            0.0  Novo Mercado     47.08%
EUCA4     31.841553        --            0.0       Nível 1     74.79%
CAMB3     46.587811        --            0.0                   100.0%
ELMD3     45.772554    100.0%            0.0  Novo Mercado     37.63%
LAND3     32.747721    100.0%            0.0  Novo Mercado     31.81%

[219 rows x 5 columns]





In [6]:
# Join data from both dataframes
stocks_general_data_df = stocks_general_data_df.join(extra_infos_df)

# show stock tickers count after filter applied
print(f"--- Stock tickers in the list: {stocks_general_data_df.shape[0]} ---")

--- Stock tickers in the list: 219 ---


### Get Historical data

In [7]:
# Concatenate all historical data available in the website (API)
def get_historical_data(stock_ticker: str) -> pandas.DataFrame:
    hist_df = None

    # Infos: dy, p_l, p_vp, p_ebita, p_ebit, p_sr, p_ativo, p_capitlgiro,
    #        p_ativocirculante, ev_ebitda, ev_ebit, lpa, vpa, peg_Ratio,
    #        dividaliquida_patrimonioliquido, dividaliquida_ebitda,
    #        dividaliquida_ebit, patrimonio_ativo, passivo_ativo,
    #        liquidezcorrente, margembruta, margemebitda, margemebit,
    #        margemliquida, roe, roa, roic, giro_ativos, receitas_cagr5,
    #        lucros_cagr5
    hist_inds = StockHistIndicatorsScraper(stock_ticker).dataframe_format
    hist_df = pandas.concat([hist_df, hist_inds])

    # Infos: earnings
    # hist_earnings = StockHistEarningsScraper(stock_ticker).dataframe_format
    # hist_df = pandas.concat([hist_df, hist_earnings])

    # Infos: payout
    # hist_payouts = StockHistPayoutsScraper(stock_ticker).dataframe_format
    # hist_df = pandas.concat([hist_df, hist_payouts])

    # Infos: receitaLiquida, despesas, lucroLiquido
    # hist_revenue = StockHistRevenueScraper(stock_ticker).dataframe_format
    # hist_df = pandas.concat([hist_df, hist_revenue])

    # Infos: margemBruta, margemEbitda, margemEbit, margemLiquida
    # hist_margins = StockHistMarginsScraper(stock_ticker).dataframe_format
    # hist_df = pandas.concat([hist_df, hist_margins])

    # Infos: ativoTotal, ativoCirculante, ativoNaoCirculante,
    #        passivoTotal, passivoCirculante, passivoNaoCirculante,
    #        patrimonioLiquido
    # hist_actives_passives = StockHistActivesAndPassivesScraper(stock_ticker).dataframe_format
    # hist_df = pandas.concat([hist_df, hist_actives_passives])

    # sort columns
    hist_df = hist_df[sorted(hist_df.columns)]

    # return hisotical indicators
    return hist_df

In [8]:
# Linear Regression
def get_linear_regression_trend(series: pandas.Series) -> float:
    # reshape
    x = numpy.array(series.index).reshape((-1, 1))
    y = numpy.array(series.values).reshape((-1, 1))

    # calculate linear regression slope
    model = LinearRegression()
    model.fit(x, y)

    # LR
    # model.score(x, y)  # R
    # model.intercept_[0]  # Intercept
    # model.coef_[0][0]  # Slope

    # predict
    last_year = series.index.max()
    predict_x = numpy.array([last_year, last_year + 1]).reshape((-1, 1))
    predict_result = model.predict(predict_x)

    # trend in %
    return predict_result[1] / predict_result[0] - 1

In [9]:
for stock_ticker in tqdm(stocks_general_data_df.index):
    hd_df = get_historical_data(stock_ticker).T

    # get positives years percentage and last negative year
    for pos_neg_years in [
        "margembruta", "margemliquida", "lpa"
    ]:
        pos_neg_years_df = hd_df[pos_neg_years].dropna() if pos_neg_years in hd_df else []
        if len(pos_neg_years_df) > 0:
            # % yers with positive values
            stocks_general_data_df.loc[
                stocks_general_data_df.index == stock_ticker,
                f"{pos_neg_years}_pos_years"
            ] = pos_neg_years_df.ge(0).sum() / len(pos_neg_years_df)
            # last negative year
            neg_years = pos_neg_years_df.loc[pos_neg_years_df.lt(0)]
            if len(neg_years) > 0:
                stocks_general_data_df.loc[
                    stocks_general_data_df.index == stock_ticker,
                    f"{pos_neg_years}_last_negative_year"
                ] = neg_years.index[-1]

    # get trend info
    for hist_trend_ind in [
        "margembruta", "margemliquida", "lpa", "vpa",
        "dividaliquida_patrimonioliquido", "dividaliquida_ebit",
        "passivo_ativo", "liquidezcorrente"
    ]:
        hist_trend_df = hd_df[hist_trend_ind].dropna() if hist_trend_ind in hd_df else []
        if len(hist_trend_df) > 0:
            # trend
            stocks_general_data_df.loc[
                stocks_general_data_df.index == stock_ticker,
                f"{hist_trend_ind}_trend"
            ] = get_linear_regression_trend(hist_trend_df)

100%|██████████| 219/219 [03:25<00:00,  1.07it/s]


### Output result into a file

In [10]:
stocks_general_data_df.to_csv("../outputs/statusinvest.csv", sep=";", index=False)