In [53]:
import pandas_datareader.data as web
#from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
from pandas_datareader._utils import RemoteDataError
from datetime import datetime, timedelta
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pickle

import codecs

In [54]:
#!pip3 install pandas_datareader

In [55]:
def plot_stock_trend_and_returns(ticker, titles, start_date, end_date, all_returns):
    
    #get the data for this ticker
    prices = web.DataReader(ticker, 'yahoo', start=start_date, end=end_date).Close
    prices.index = [d.date() for d in prices.index]
    
    plt.figure(figsize=(10,6))
    
    #plot stock price
    plt.subplot(2,1,1)
    plt.plot(prices)
    plt.title(titles[0], fontsize=16)
    plt.ylabel('Price ($)', fontsize=14)
    
    #plot stock returns
    plt.subplot(2,1,2)
    plt.plot(all_returns[0], all_returns[1], color='g')
    plt.title(titles[1], fontsize=16)
    plt.ylabel('Pct. Return', fontsize=14)
    plt.axhline(0, color='k', linestyle='--')
    
    plt.tight_layout()
    
    plt.show()

In [67]:
def perform_analysis_for_stock(ticker, start_date, end_date, return_period_weeks, verbose=False):
    """
    Inputs:
        ticker: the ticker symbol to analyze
        start_date: the first date considered in simulation
        end_date: the last date considered in simulation
        return_period_weeks: the number of weeks in which to calculate returns
        verbose: True if you want to print simulation steps
        
    Outputs:
        average and standard deviation of returns for simulated runs of this ticker within the given date range
    """
    
    #get the data for this ticker
    try:
        prices = web.DataReader(ticker, 'yahoo', start=start_date, end=end_date).Close
    #could not find data on this ticker
    except (RemoteDataError, KeyError):
        #return default values
        print('Could not find data for this ticker !')
        return -np.inf, np.inf, None
    
    prices.index = [d.date() for d in prices.index]
    
    #this will store all simulated returns
    pct_return_after_period = []
    buy_dates = []

    #assume we buy the stock on each day in the range
    for buy_date, buy_price in prices.iteritems():
        #get price of the stock after given number of weeks
        sell_date = buy_date + timedelta(weeks=return_period_weeks)
        
        try:
            sell_price = prices[prices.index == sell_date].iloc[0]
        #trying to sell on a non-trading day, skip
        except IndexError:
            continue
        
        #compute the percent return
        pct_return = (sell_price - buy_price)/buy_price
        pct_return_after_period.append(pct_return)
        buy_dates.append(buy_date)
        
        if verbose:
            print('Date Buy: %s, Price Buy: %s'%(buy_date,round(buy_price,2)))
            print('Date Sell: %s, Price Sell: %s'%(sell_date,round(sell_price,2)))
            print('Return: %s%%'%round(pct_return*100,1))
            print('-------------------')
    
    #if no data collected return default values
    if len(pct_return_after_period) == 0:
        return -np.inf, np.inf, None
    
    #report average and deviation of the percent returns
    return np.mean(pct_return_after_period), np.std(pct_return_after_period), [buy_dates, pct_return_after_period]

# User Input Area

In [80]:
#start date for simulation. 
#Further back means more training data but risk of including patterns that no longer exist
#More recent means less training data but only using recent patterns
start_date, end_date = datetime(2019,1,1), datetime.now().date()

#set number of weeks in which you want to see return
return_period_weeks = 4

#I want at least this much average return
min_avg_return  = 0.05

#I want at most this much volatility in return
max_dev_return = 0.15

In [58]:
#Download file "series_tickers.p" from github or create your own list of tickers
#series_tickers = pickle.load(open("series_tickers.p", "rb" ))

In [59]:
#print(series_tickers)

In [43]:
#doc = codecs.open('B3.txt','rU','UTF-16') 
#df_stock = pd.read_csv(doc, sep='\t')

#df_stock = pd.read_csv('B3.txt', sep='\t', lineterminator='\n')

df_stock = pd.read_csv('B3.csv', encoding= 'unicode_escape')

#PRAZOS_CEP.to_csv(file_name, sep='\t')

In [47]:
df_stock

Unnamed: 0,Empresa,Classe,Codigo,Setor,Segmento
0,524 Particip,ON,QVQP3B,Financeiro,Outros
1,Abc Brasil,PN,ABCB4,Financeiro,Bancos
2,Aco Altona,ON,EALT3,Bens industriais,M q. e equip. construo e agricolas
3,Aco Altona,PN,EALT4,Bens industriais,M q. e equip. construo e agricolas
4,AdvancedDh,ON,ADHM3,Sade,Servios medicohospitalares an lises e diagnsticos
...,...,...,...,...,...
535,Wilson Sons,ON,WSON33,Bens industriais,Servios de apoio e armazenagem
536,Wiz S.A,ON,WIZS3,Financeiro,Corretoras de seguros
537,Wlm Ind Com,ON,WLMM3,Bens industriais,Material de transporte
538,Wlm Ind Com,PN,WLMM4,Bens industriais,Material de transporte


In [48]:
classes_in = ['ON','PN','PN Resg','PNA','PNB','PNC','PND','PNE','PNF','PNG','PNAB','PNAE','PNAG']
seg_out = ['Bancos']
df_stock = df_stock[df_stock.Classe.isin(classes_in) & 
                    ~df_stock.Classe.isin(seg_out)
                   ]

In [49]:
df_stock

Unnamed: 0,Empresa,Classe,Codigo,Setor,Segmento
0,524 Particip,ON,QVQP3B,Financeiro,Outros
1,Abc Brasil,PN,ABCB4,Financeiro,Bancos
2,Aco Altona,ON,EALT3,Bens industriais,M q. e equip. construo e agricolas
3,Aco Altona,PN,EALT4,Bens industriais,M q. e equip. construo e agricolas
4,AdvancedDh,ON,ADHM3,Sade,Servios medicohospitalares an lises e diagnsticos
...,...,...,...,...,...
535,Wilson Sons,ON,WSON33,Bens industriais,Servios de apoio e armazenagem
536,Wiz S.A,ON,WIZS3,Financeiro,Corretoras de seguros
537,Wlm Ind Com,ON,WLMM3,Bens industriais,Material de transporte
538,Wlm Ind Com,PN,WLMM4,Bens industriais,Material de transporte


In [None]:
### Incluir aqui novos filtros na base pelas colunas Codigo, Setor e Segmento
### Na sequência deixar apenas o nome e código para fazer as buscas e análises

In [50]:
df_stock.drop(['Classe', 'Setor',  'Segmento'], axis='columns', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [51]:
df_stock

Unnamed: 0,Empresa,Codigo
0,524 Particip,QVQP3B
1,Abc Brasil,ABCB4
2,Aco Altona,EALT3
3,Aco Altona,EALT4
4,AdvancedDh,ADHM3
...,...,...
535,Wilson Sons,WSON33
536,Wiz S.A,WIZS3
537,Wlm Ind Com,WLMM3
538,Wlm Ind Com,WLMM4


In [52]:
print(len(df_stock))

526


In [82]:
#for reg in range(0, len(df_stock)):
for i in range(0, 10):
    ticker = df_stock.Codigo[i] + '.SA'
    #ticker = 'WEGE3.SA'
    name   = df_stock.Empresa[i]
    print(ticker)
    avg_return, dev_return, all_returns = perform_analysis_for_stock(ticker, start_date, end_date, return_period_weeks)
    #print(avg_return)
    #print(min_avg_return)
    #print(dev_return)
    #print(max_dev_return)
    if avg_return > min_avg_return and dev_return < max_dev_return:
        #print('entrei')
        title_price = '%s\n%s'%(ticker, name)
        title_return = 'Avg Return: %s%% | Dev Return: %s%%'%(round(100*avg_return,2), round(100*dev_return,2))
        plot_stock_trend_and_returns(ticker, [title_price, title_return], start_date, end_date, all_returns)

QVQP3B.SA


NameError: name 'RemoteDataError' is not defined