In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from ipywidgets import interact, fixed, FloatSlider, IntSlider

In [2]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [3]:
def get_dados(indice='portfolio'):
    if indice=='portfolio':
        df = pd.read_csv('dados/retorno_portfolio.csv', parse_dates=['Date'])
        df.fillna(0, inplace=True)
    elif indice=='ACWI':    
        df = pd.read_csv('dados/retorno_portfolio.csv', parse_dates=['Date'])
        df.fillna(0, inplace=True)
        df['retorno'] = df['ACWI']
    else:
        url = f'https://query1.finance.yahoo.com/v7/finance/download/{indice}?period1=852076800&period2=1634743473&interval=1d&events=history&includeAdjustedClose=true'
        df = pd.read_csv(url)
        df['Date'] = pd.to_datetime(df['Date'])
        df['retorno'] = df['Close'].pct_change()
        
    df['year_week'] = df['Date'].dt.strftime('%Y-%U')
    df['retorno acumulado'] = df['retorno'] + 1
    df['retorno acumulado'] = df['retorno acumulado'].cumprod()
    
    df_gsrai = pd.read_csv('dados/GSRAII.csv', parse_dates=['Date']).sort_values('Date')

    df = df.merge(df_gsrai, on='Date')
    return df[['Date', 'retorno acumulado', 'GSRAII Index', 'year_week']].dropna()

### Indices:
- ^GSPC = *SP500*
- ^BVSP = *Bovespa*
- ACWI
- portfolio

In [4]:
# Trocar codigo do indice
indice='portfolio'
df = get_dados(indice=indice)
df

Unnamed: 0,Date,retorno acumulado,GSRAII Index,year_week
0,1997-01-02,1.000000,-0.04,1997-00
1,1997-01-03,1.004739,0.13,1997-00
2,1997-01-06,1.013472,0.22,1997-01
3,1997-01-07,1.013524,0.21,1997-01
4,1997-01-08,1.012990,0.22,1997-01
...,...,...,...,...
6385,2021-08-19,2.640252,-0.11,2021-33
6386,2021-08-20,2.627207,-0.09,2021-33
6387,2021-08-23,2.677371,-0.02,2021-34
6388,2021-08-24,2.686924,0.08,2021-34


In [5]:
#%% Semanal
df_gp = df.groupby('year_week').agg('last')

In [71]:
# # SHIFT +
# for pct in range(1,6):
#     df_gp[f'pct_{pct}_sem'] = (df_gp['retorno acumulado']/df_gp['retorno acumulado'].shift(pct).values -1) * 100

# SHIFT -
for pct in range(1,6):
    df_gp[f'pct_{pct}_sem'] = (df_gp['retorno acumulado'].shift(pct*-1).values/
                           df_gp['retorno acumulado'] -1) * 100

In [75]:
def classe_gsrai(df, limit=0, diff=None, media_movel=3):
    
    df=df.copy()
    df['gsrai_diff'] = df['GSRAII Index'].diff()
    
    if not media_movel == None: 
        df['mean'] = df['GSRAII Index'].rolling(media_movel).mean()
        up_down = df['mean']
        df['gsrai_gt_up'] = np.where((df['GSRAII Index'] > up_down) & (df['GSRAII Index'] >= limit), 1, 0)
        df['gsrai_gt_down'] = np.where((df['GSRAII Index'] < up_down) & (df['GSRAII Index'] >= limit), 1, 0)
        df['gsrai_lt_up'] = np.where((df['GSRAII Index'] > up_down) & (df['GSRAII Index'] < limit), 1, 0)
        df['gsrai_lt_down'] = np.where((df['GSRAII Index'] < up_down) & (df['GSRAII Index'] < limit), 1, 0)
    elif not diff == None: 
        df['gsrai_gt_up'] = np.where((df['gsrai_diff'] >= diff) & (df['GSRAII Index'] >= limit), 1, 0)
        df['gsrai_gt_down'] = np.where((df['gsrai_diff'] < diff) & (df['GSRAII Index'] >= limit), 1, 0)
        df['gsrai_lt_up'] = np.where((df['gsrai_diff'] >= diff) & (df['GSRAII Index'] < limit), 1, 0)
        df['gsrai_lt_down'] = np.where((df['gsrai_diff'] < diff) & (df['GSRAII Index'] < limit), 1, 0)
        
        
    regras = ['gsrai_gt_up', 'gsrai_gt_down', 'gsrai_lt_up', 'gsrai_lt_down']
    df['classe'] = np.nan
    for col in regras:
        df.loc[df[col]==1, 'classe'] = col
    return df

In [76]:
df_gp =  classe_gsrai(df=df_gp, limit=0, media_movel=None, diff=0)

In [77]:
df_gp

Unnamed: 0_level_0,Date,retorno acumulado,GSRAII Index,pct_1_sem,pct_2_sem,pct_3_sem,pct_4_sem,pct_5_sem,mean,gsrai_gt_up,gsrai_gt_down,gsrai_lt_up,gsrai_lt_down,gsrai_diff,classe
year_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1997-00,1997-01-03,1.004739,0.13,0.260471,1.658079,0.369719,1.059158,0.492158,,0,0,0,0,,
1997-01,1997-01-10,1.007356,0.22,1.393978,0.108964,0.796612,0.231085,2.344282,,1,0,0,0,0.09,gsrai_gt_up
1997-02,1997-01-17,1.021398,0.36,-1.267347,-0.589153,-1.146905,0.937239,1.409438,0.236667,1,0,0,0,0.14,gsrai_gt_up
1997-03,1997-01-24,1.008454,0.37,0.686899,0.121988,2.232885,2.711145,1.777959,0.316667,1,0,0,0,0.01,gsrai_gt_up
1997-04,1997-01-31,1.015381,0.29,-0.561058,1.535438,2.010436,1.083616,2.346182,0.340000,0,1,0,0,-0.08,gsrai_gt_down
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-30,2021-07-30,2.716325,0.11,-1.467086,-0.575364,-3.280842,-1.709995,,0.220000,0,1,0,0,-0.20,gsrai_gt_down
2021-31,2021-08-06,2.676474,0.31,0.904999,-1.840762,-0.246526,,,0.243333,1,0,0,0,0.20,gsrai_gt_up
2021-32,2021-08-13,2.700696,0.26,-2.721135,-1.141198,,,,0.226667,0,1,0,0,-0.05,gsrai_gt_down
2021-33,2021-08-20,2.627207,-0.09,1.624132,,,,,0.160000,0,0,0,1,-0.35,gsrai_lt_down


In [78]:
pct =  'pct_3_sem'
regras = ['gsrai_gt_up', 'gsrai_gt_down', 'gsrai_lt_up', 'gsrai_lt_down']

In [79]:
lista = []
for col in df_gp['classe'].dropna().unique():
    desc = df_gp[df_gp[col]==1][[pct]].describe().rename(columns={pct: col})
    lista.append(desc)
pd.concat(lista, axis=1)

Unnamed: 0,gsrai_gt_up,gsrai_gt_down,gsrai_lt_down,gsrai_lt_up
count,369.0,272.0,330.0,327.0
mean,0.519067,0.610383,-0.162479,0.23767
std,2.83799,2.906784,4.750104,3.779462
min,-7.702634,-8.874977,-28.527886,-11.9918
25%,-1.019153,-1.30467,-1.8902,-1.87474
50%,0.614841,0.852636,0.267737,0.305977
75%,2.291294,2.452183,2.431921,2.446275
max,8.273622,12.718024,13.405774,18.575503


In [91]:
@interact(df=fixed(df_gp), 
          pct=['pct_1_sem', 'pct_2_sem', 'pct_3_sem', 'pct_4_sem', 'pct_5_sem'],
          limit=FloatSlider(min=-1, max=1, step=0.05, value=0),
          diff=FloatSlider(min=-1, max=1, step=0.05, value=0),
#           media_movel=IntSlider(min=2, max=6, step=1, value=3)
         )
def hist_classes(df, pct, limit=0, diff=0):
    df = classe_gsrai(df=df, limit=limit, media_movel=None, diff=diff).dropna()
    fig = px.histogram(df.sort_values('classe'), x=pct, color="classe", marginal="box", nbins=100, 
#                        range_y=[0,50],
                       range_x=[df[pct].min()-1,df[pct].max()+1],
                       title=f'Distribuição {indice} {pct} ',
                      )
    fig.update_layout(barmode='overlay')
    fig.update_traces(opacity=0.65)
    fig.add_vline(x=0,line_dash="dash")
    
    for i, classe in enumerate(df['classe'].unique()):
        fig.add_annotation(text=f'Total {classe}: {len(df[df["classe"]==classe])}',
                           showarrow=False, yshift=70-(i*19), xshift=450)
        fig.add_annotation(text=f'σ {classe}: {df[df["classe"]==classe][pct].std():.2f} ',
                       showarrow=False, yshift=200-(i*19), xshift=450)   
    return fig

    
hist_classes(df_gp, pct, limit=-1, diff=0);

interactive(children=(Dropdown(description='pct', options=('pct_1_sem', 'pct_2_sem', 'pct_3_sem', 'pct_4_sem',…

In [97]:
@interact(df=fixed(df_gp), 
          pct=['pct_1_sem', 'pct_2_sem', 'pct_3_sem', 'pct_4_sem', 'pct_5_sem'],
          limit=FloatSlider(min=-1, max=1, step=0.05, value=0),
#           media_movel=IntSlider(min=2, max=6, step=1, value=3)
         )
def hist_diff(df, pct, limit=0):
    df = classe_gsrai(df=df, limit=limit, media_movel=3).dropna()
    df = df.copy()
    df['classe_diff'] = np.where(df['gsrai_diff']<=limit, 'abaixo', 'acima')
    fig = px.histogram(df.sort_values('classe_diff', ascending=False), 
                       x=pct, color="classe_diff", 
                       marginal="box", nbins=100, 
#                        range_y=[0,50],
                       range_x=[df[pct].min()-1,df[pct].max()+1],
                       title=f'Distribuição {indice} {pct} por diff GSRAI',
                      )
    fig.update_layout(barmode='overlay')
    fig.update_traces(opacity=0.65)
    fig.add_vline(x=0,line_dash="dash")
    
    for i, classe in enumerate(df['classe_diff'].unique()):
        fig.add_annotation(text=f'Total {classe}: {len(df[df["classe_diff"]==classe])}',
                           showarrow=False, yshift=100-(i*19), xshift=450)
        fig.add_annotation(text=f'σ {classe}: {df[df["classe_diff"]==classe][pct].std():.2f} ',
                       showarrow=False, yshift=200-(i*19), xshift=450)   
    return fig

    
hist_diff(df_gp, pct);

interactive(children=(Dropdown(description='pct', options=('pct_1_sem', 'pct_2_sem', 'pct_3_sem', 'pct_4_sem',…

In [83]:
cut = df_gp[pct].quantile(0.15)
df_gp[(df_gp[pct]<=cut)].sum()

retorno acumulado                                           339.556038
GSRAII Index                                                    -39.18
pct_1_sem                                                  -348.099447
pct_2_sem                                                   -704.47032
pct_3_sem                                                  -1067.47918
pct_4_sem                                                 -1058.919594
pct_5_sem                                                 -1044.632158
mean                                                            -35.77
gsrai_gt_up                                                         45
gsrai_gt_down                                                       29
gsrai_lt_up                                                         56
gsrai_lt_down                                                       65
gsrai_diff                                                       -4.84
classe               gsrai_gt_upgsrai_gt_upgsrai_lt_downgsrai_gt_up...
dtype:

In [85]:
df_gp[(df_gp[pct]>cut) & (df_gp['gsrai_lt_down']==0)][pct].describe()

count    839.000000
mean       1.284395
std        2.450665
min       -2.773841
25%       -0.561769
50%        0.972294
75%        2.696895
max       18.575503
Name: pct_3_sem, dtype: float64

In [16]:
df_gp.columns

Index(['Date', 'retorno acumulado', 'GSRAII Index', 'pct_1_sem', 'pct_2_sem',
       'pct_3_sem', 'pct_4_sem', 'pct_5_sem', 'mean', 'gsrai_gt_up',
       'gsrai_gt_down', 'gsrai_lt_up', 'gsrai_lt_down', 'gsrai_diff',
       'classe'],
      dtype='object')

In [20]:
# import seaborn as sns
# sns.pairplot(df_gp.drop(columns= ['gsrai_gt_up',
#        'gsrai_gt_down', 'gsrai_lt_up', 'gsrai_lt_down']), hue='classe');

In [19]:
df_gp.to_csv('gsrai.csv')

In [None]:
df_gp['2019-35':'2019-45']

In [None]:
df_gp[(df_gp['pct_ret']<0) & (df_gp['gsrai_lt_down']==1)]['pct_ret'].describe()

In [None]:
df_gp[(df_gp['gsrai_lt_down']==1)]['pct_ret'].describe()

In [None]:
df_gp[(df_gp['gsrai_lt_down']==1)]['pct_ret_shift'].describe()

In [None]:
df['retorno acumulado'].plot()

In [None]:
#%% Grafico

fig = px.line(df, x="Date", y='retorno acumulado')
# mask = df_gp['gsrai_lt_down']==1
# fig = px.line(df_gp, x="Date", y=['GSRAII Index', 'pct_ret'])

# fig.add_trace(go.Scatter(
#     x=df_gp[mask]['Date'],
#     y=df_gp[mask][pct],
#     marker_size=10, mode='markers', name='Queda',
#     ))

fig.show()