In [24]:
import pandas as pd
import datetime
# Importe aqui outras bibliotecas necessárias, como requests ou yfinance para coleta de dados
from IPython.display import display
import yfinance as yf
from ta import add_all_ta_features
from ta.utils import dropna
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def display_info(title:str, df:pd.DataFrame, ticker:None, start_date:None, end_date:None):
    """
    Display formatted information about the data processing steps.

    Args:
    title (str): Description of the information to display.
    df (DataFrame): The pandas DataFrame containing the stock data.
    ticker (str): The stock ticker symbol.
    start_date (str): The start date of the data.
    end_date (str): The end date of the data.
    """
    # Creating a separator for aesthetic purposes
    separator = "-" * 50

    # Getting data shape and missing values
    shape_info = f"Data Shape: {df.shape} (rows, columns)"
    missing_values_info = f"Missing Values: {df.isnull().sum().sum()}"

    # Basic statistics
    basic_stats = df.describe().to_string()

    # Using f-string for formatted output
    info = (
        f"\n{separator}\n"
        f"{title}\n"
        f"Ticker: {ticker}\n"
        f"Start Date: {start_date}\n"
        f"End Date: {end_date}\n"
        f"{shape_info}\n"
        f"{missing_values_info}\n"
        f"Basic Statistics:\n{basic_stats}\n"
        f"{separator}\n"
    )

    print(info)

def add_technical_indicators(df_stock_data:pd.DataFrame):
    """
    Adiciona indicadores técnicos ao DataFrame de ações.

    Returns:
    DataFrame: Dados históricos do ativo.

                    Open       High        Low      Close  Adj Close     Volume  
    Date                                                                           
    2020-01-02  74.059998  75.150002  73.797501  75.087502  73.152657  135480400   
    2020-01-03  74.287498  75.144997  74.125000  74.357498  72.441460  146322800   
    2020-01-06  73.447502  74.989998  73.187500  74.949997  73.018700  118387200   

                volume_adi  volume_obv  volume_cmf  volume_fi  ...  
    Date                                                         ...   
    2020-01-02  1.229591e+08   135480400         NaN        NaN  ...   
    2020-01-03  4.334199e+07   -10842400         NaN        NaN  ...   
    2020-01-06  1.564747e+08   107544800         NaN        NaN  ...   
    """
    if df_stock_data.empty:
        raise ValueError("DataFrame está vazio. Não é possível calcular indicadores técnicos.")
    
    df_stock_data = dropna(df_stock_data)
    try:
        df_stock_data_all_features = add_all_ta_features(df_stock_data, open="Open", high="High", low="Low", close="Close", volume="Volume")
    except Exception as e:
        raise ValueError(f"Erro ao adicionar indicadores técnicos: {e}")
    
    return df_stock_data_all_features


def load_stock_data(ticker:None, start_date:None, end_date:None):
    """
    Carrega dados históricos do mercado de ações para o ticker especificado.

    Args:
    ticker (str): Ticker do ativo.
    start_date (str): Data de início no formato 'YYYY-MM-DD'.
    end_date (str): Data de fim no formato 'YYYY-MM-DD'.

    Returns:
    DataFrame: Dados históricos do ativo.
                        Open       High        Low      Close  Adj Close     Volume
    Date                                                                        
    2020-01-02  74.059998  75.150002  73.797501  75.087502  73.152657  135480400
    2020-01-03  74.287498  75.144997  74.125000  74.357498  72.441460  146322800
    2020-01-06  73.447502  74.989998  73.187500  74.949997  73.018700  118387200
    """
    # Implemente a lógica para carregar dados do ativo. Pode ser de uma API ou arquivo.
    # Por exemplo, usando yfinance: df = yfinance.download(ticker, start=start_date, end=end_date)
    df_stock_data=yf.download(ticker, start=start_date, end=end_date)
    if df_stock_data.empty:
        raise ValueError("Nenhum dado retornado. Verifique o ticker ou as datas.")
    return df_stock_data

     

def clean_data(df_stock_data_all_features:pd.DataFrame):
    """
    Limpa e prepara os dados do ativo.

    Args:
    df (DataFrame): DataFrame contendo dados brutos do ativo.

    Returns:
    DataFrame: DataFrame limpo e pronto para análise.
    """
    # Implemente a lógica de limpeza de dados
    return df_stock_data_all_features

def colect_and_clean_data(ticker:None, start_date:None, end_date:None):
    """
    Função principal para carregar e preparar dados do ativo.

    Args:
    ticker (str): Ticker do ativo.
    start_date (str): Data de início.
    end_date (str): Data de fim.
    """

    try:
        df_stock_data = load_stock_data(ticker=ticker, start_date=start_date, end_date=end_date)

        df_stock_data_all_features = add_technical_indicators(df_stock_data)

        df_stock_data_all_features_clean=clean_data(df_stock_data_all_features)

        display_info(title='Successful Import', ticker=ticker, start_date=start_date, end_date=end_date, df=df_stock_data_all_features_clean)

        return df_stock_data_all_features_clean
        
    except ValueError as e:
        display(e)
    return None


# Exemplo de uso
if __name__ == "__main__":
    ticker = "AAPL"  # Substitua pelo ativo desejado
    start_date = "2020-01-01"
    end_date = "2020-12-31"
    try:
        df_stock = colect_and_clean_data(ticker, start_date, end_date)
    except ValueError as e:
        display(e)


[*********************100%%**********************]  1 of 1 completed





--------------------------------------------------
Successful Import
Ticker: AAPL
Start Date: 2020-01-01
End Date: 2020-12-31
Data Shape: (252, 92) (rows, columns)
Missing Values: 1587
Basic Statistics:
             Open        High         Low       Close   Adj Close        Volume    volume_adi    volume_obv  volume_cmf     volume_fi   volume_em  volume_sma_em    volume_vpt  volume_vwap  volume_mfi   volume_nvi  volatility_bbm  volatility_bbh  volatility_bbl  volatility_bbw  volatility_bbp  volatility_bbhi  volatility_bbli  volatility_kcc  volatility_kch  volatility_kcl  volatility_kcw  volatility_kcp  volatility_kchi  volatility_kcli  volatility_dcl  volatility_dch  volatility_dcm  volatility_dcw  volatility_dcp  volatility_atr  volatility_ui  trend_macd  trend_macd_signal  trend_macd_diff  trend_sma_fast  trend_sma_slow  trend_ema_fast  trend_ema_slow  trend_vortex_ind_pos  trend_vortex_ind_neg  trend_vortex_ind_diff  trend_trix  trend_mass_index   trend_dpo   trend_kst  trend_kst_

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 252 entries, 2020-01-02 to 2020-12-30
Data columns (total 92 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Open                       252 non-null    float64
 1   High                       252 non-null    float64
 2   Low                        252 non-null    float64
 3   Close                      252 non-null    float64
 4   Adj Close                  252 non-null    float64
 5   Volume                     252 non-null    int64  
 6   volume_adi                 252 non-null    float64
 7   volume_obv                 252 non-null    int64  
 8   volume_cmf                 233 non-null    float64
 9   volume_fi                  239 non-null    float64
 10  volume_em                  251 non-null    float64
 11  volume_sma_em              238 non-null    float64
 12  volume_vpt                 251 non-null    float64
 13  volume_vwap                239 