# Tamanho do Mercado das Usinas Sucroalcooleiras

## Histórico de Atualizações

<table>
    <thead>
        <tr>
            <th>Versão</th>
            <th>Data</th>
            <th>Descrição</th>
            <th>Autor</th>
            <th>Email</th>
        </tr>
    </thead>
    <tbody>
        <tr>
        <th>1.0</th>
            <td>08/05/2023</td>
            <td>
                <p>...</p>
            </td>
            <td>Marcel Quintela, Sergio Urzedo Jr</td>
            <td>marcel.quintela@avanade.com, sergiourzedojr@gmail.com</td>
        </tr>
    </tbody>
</table>

# Setup Inicial

Carregando linhas com configurações iniciais já escritas em notebook base

No bloco de comando "%run" não pode ter nenhum outra linha de comando ou comentário

In [56]:
%run nb00_Setup_Usinas{'blob_relative_path_raw': 'nuvem/Usinas/'}

# Leitura dos Dados 

In [57]:
usinas   = pd.read_parquet(abfss_path_enriched + 'tancagens_impt_usinas.parquet',
                            storage_options = {'linked_service' : linked_service_enriched})
hist_usinas = pd.read_parquet(abfss_path_enriched + 'vendas_impt_mes.parquet',
                            storage_options = {'linked_service' : linked_service_enriched})

prop  = pd.read_parquet('abfss://general@stedlk01dtandev.dfs.core.windows.net/enriched/mercado_potencial/volumetria/vol01_prop_consfinal_vegetativo.parquet',
                        storage_options = {'linked_service' : linked_service_enriched})
cf  = pd.read_parquet('abfss://general@stedlk01dtandev.dfs.core.windows.net/enriched/mercado_potencial/volumetria/vol01_vendas_vibra_mes.parquet',
                        storage_options = {'linked_service' : linked_service_enriched})
diesel_mes = pd.read_parquet('abfss://general@stedlk01dtandev.dfs.core.windows.net/enriched/mercado_potencial/anp/anp_diesel_mes.parquet',
                                storage_options = {'linked_service' : linked_service_enriched})

# Tamanho do Mercado

O tamanho do mercado pode ser obtido por duas manairas:

1. Pela visão da Clientela</br>
    - $TM = C + \bar{C} $  
2. Pela visão da participação da Atividade Econômica no Segmento Consumidor final</br>
    - $TM = U_{part} \times Cons.Final$

Onde assumiu-se que:</br>
- $Cons.Final = Cons.Final_{Global}$ e</br>
- $U_{part} = U_{part: Vibra}$

In [58]:
tm1_m = hist_usinas.groupby(['Data','IMP'])['Vol_Total'].sum().reset_index()
tm1_m = tm1_m.pivot_table(values='Vol_Total', index='Data', columns='IMP').reset_index()
tm1_m.rename_axis(None, axis=1, inplace=True)
tm1_m.columns = ['Data','Vol_Cliente', 'Vol_NaoCliente']
tm1_m['Vol_Usina'] = tm1_m[['Vol_Cliente', 'Vol_NaoCliente']].sum(axis=1)

In [64]:
hist = hist_usinas.groupby(['Data','IMP'])['Vol_Total'].agg('sum').reset_index()
# hist['ds'] = pd.to_datetime(hist['ANOMES'],format='%Y-%m' )
# hist['y']= hist['Vol_Total']
hist #= hist[['ds','y']]

In [59]:
tm1_m

In [60]:
tm2_m = pd.DataFrame(diesel_mes.loc['2016':]['consumidor final']*prop['USINA'][:-1]).reset_index()
tm2_m.columns = ['Data','Vol_Usina']

In [61]:
tm2_m

# Teste de Aderência das duas séries

In [33]:
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.kolmogorov.html

x = tm1_m['Vol_Usina'][:85].values
y = tm2_m['Vol_Usina'][:85].values

# probabilidades
px = x/x.sum()
py = y/y.sum()

#probabilidades acumuladas
pxc = px.cumsum()
pyc = py.cumsum()

grid = np.linspace(-3, 3, 85)

plt.step(grid, px.cumsum(), label='Tamanho de Mercado 1')
plt.plot(grid, py.cumsum(), label='Tamanho de Mercado 2')
plt.ylim([0, 1]); plt.grid(True); plt.legend();

gaps = np.column_stack([pyc-pxc, pxc-pyc])
a, b = np.argmax(gaps, axis=0)
plt.vlines([grid[a]], pyc[a], pxc[a], color='r', linestyle='dashed', lw=4)
plt.vlines([grid[b]], pxc[b], pyc[b], color='r', linestyle='dashed', lw=4)

plt.show()

In [34]:
Dn = np.max(gaps) # = pxc[b]- pyc[b]
Kn = np.sqrt(len(x)) * Dn
print('Dn=%f, sqrt(n)*Dn=%f' % (Dn, Kn))

In [35]:
stats.kstest(x,y, alternative='two-sided')

# Gráficos

In [37]:
cores = ['green','gray','purple', 'gold']

fig = make_subplots(rows=1, cols=1,
                    y_title='Volume (m³)',
                    subplot_titles=(''),
                    shared_yaxes=False,
                    vertical_spacing=0.1 )

# grafico 01
for c,i in enumerate(tm1_m.columns[1:3]):
    fig.add_trace(go.Scatter(
                        name = i,
                        x = tm1_m['Data'],
                        y = tm1_m[i],
                        line = {'color': cores[c]},
                        visible=True,
                        stackgroup='one'),
                    row=1, col=1
    )

fig.add_trace(go.Scatter( # Linha do Crescimento Vegetativo Vibra -  Consumidor Final
            x = tm1_m['Data'],
            y = tm1_m['Vol_Usina'],
            line = {'color': 'green', 'dash':'dot'},
            name = 'TM (Cli + ÑCli)',
            showlegend=True),
    row=1, col=1)

fig.add_trace(go.Scatter( # Linha do Tamanho de Mercado (%Usi * ConsFinal)
            x = tm2_m['Data'],
            y = tm2_m['Vol_Usina'],
            line = dict(color='red', width=4),
            name = 'TM (%Usi * ConsFinal)',
            showlegend=True),
    row=1, col=1)

fig.update_xaxes(matches='x', rangeslider_visible=True, rangeslider_thickness = 0.05)
fig.update_xaxes(rangeslider= {'visible':True}, row=1, col=1)

fig.add_annotation(
            showarrow=False,
            text='Gerado em ' + dt.now(tz_SP).strftime("%d %b %Y às %H:%M:%S"),
            font=dict(size=12),
            align="right",
            x=1.18,
            y=-0.24,
            xref='paper',
            yref='paper',
            xanchor='right',
            yanchor='bottom',)

# titulo
fig.update_layout(
                template='plotly_white', # template ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]
                title = 'Volume de Diesel Vendido para Usinas Sucroalcooleiras' + '<br><sup>'+ 
                        'Considerando total de usinas MAPA (mar/2023)'+'</sup>',
                legend_title = "Legenda",
                font=dict(
                        family = "Courier New, monospace",
                        size = 14,
                        color = "royalblue"),
                height=600
)

fig = plotly.offline.plot(fig, output_type='div')
displayHTML(fig)
#fig.show()


In [51]:
def agrup_graf(df, geo='UF'):
    aux = df.set_index('Data')
    aux = aux.groupby([geo])['Vol_Total'].resample('MS').sum()
    aux = aux.unstack(level=0)
    #aux.columns = aux.columns.droplevel(0)
    aux = aux.rename_axis(None, axis=1)
    return(aux)

In [48]:
df = hist_usinas.copy()
geo='REGIAO3'

aux = df.set_index('Data')
aux = aux.groupby([geo])['Vol_Total'].resample('MS').sum()
aux = aux.unstack(level=0)
aux

In [52]:
agrup_graf(hist_usinas, geo='REGIAO3')

In [53]:
titulo = ('Volume de Diesel Vendido para Usinas Sucroalcooleiras por região' + '<br><sup>'+
            'Considerando total de usinas MAPA (mar/2023)'+'</sup>')

multi_plot(agrup_graf(hist_usinas, geo='REGIAO3'), titulo, 'Agrupamentos<br>Regionais')
# incluir quebra cliente e não cliente

In [207]:
df = tm1_m[['Data']]
df['Erro'] = (tm1_m['Vol_Usina'] - tm2_m['Vol_Usina'])/tm2_m['Vol_Usina']

fig = go.Figure()
fig.add_trace(go.Scatter( # Linha do Crescimento Vegetativo Vibra -  Consumidor Final
            x = df['Data'],
            y = df['Erro'],
            line = {'color': 'green', 'dash':'dot'}))
fig.update_layout(
                template='plotly_white', # template ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]
                title = 'Erro Relativo Mensal em relação do Tamanho do Mercado Proporcional',
                font=dict(
                        family = "Courier New, monospace",
                        size = 14,
                        color = "royalblue"),
                height=400
)
fig = plotly.offline.plot(fig, output_type='div')
displayHTML(fig)

In [199]:
df = agrup_graf(hist_usinas, geo='REGIAO')

In [176]:
df

In [204]:
dfi=pd.DataFrame({'date': {0: '2020.01.01',
  1: '2020.01.01',
  2: '2020.01.01',
  3: '2020.01.01',
  4: '2020.01.01',
  5: '2020.01.01',
  6: '2020.02.01',
  7: '2020.02.01',
  8: '2020.02.01',
  9: '2020.02.01',
  10: '2020.02.01',
  11: '2020.02.01',
  12: '2020.03.01',
  13: '2020.03.01',
  14: '2020.03.01',
  15: '2020.03.01',
  16: '2020.03.01',
  17: '2020.03.01'},
 'sub_id': {0: 1233,
  1: 1233,
  2: 1233,
  3: 3424,
  4: 3424,
  5: 3424,
  6: 1233,
  7: 1233,
  8: 1233,
  9: 3424,
  10: 3424,
  11: 3424,
  12: 1233,
  13: 1233,
  14: 1233,
  15: 3424,
  16: 3424,
  17: 3424},
 'stat_type': {0: 'link_clicks',
  1: 'transaction',
  2: 'customer_signups',
  3: 'link_clicks',
  4: 'transaction',
  5: 'customer_signups',
  6: 'link_clicks',
  7: 'transaction',
  8: 'customer_signups',
  9: 'link_clicks',
  10: 'transaction',
  11: 'customer_signups',
  12: 'link_clicks',
  13: 'transaction',
  14: 'customer_signups',
  15: 'link_clicks',
  16: 'transaction',
  17: 'customer_signups'},
 'value': {0: 12,
  1: 50,
  2: 9,
  3: 24,
  4: 100,
  5: 18,
  6: 14,
  7: 24,
  8: 39,
  9: 20,
  10: 10,
  11: 8,
  12: 4,
  13: 2,
  14: 3,
  15: 2,
  16: 1,
  17: 1}})

dfi

In [206]:
# Imports
import plotly.graph_objs as go
import pandas as pd
import numpy as np


# change some types 
dfi['date']=pd.to_datetime(dfi['date'])
dfi['sub_id']=dfi['sub_id'].astype(str)
df=dfi

# split df by stat_type and organize them in a dict
groups = df['stat_type'].unique().tolist()
dfs={}
for g in groups:
    dfs[str(g)]=df[df['stat_type']==g]

# pivot data to get different sub_id across dates
dfp={}
for df in dfs:
    dfp[df]=dfs[df].pivot(index='date', columns='sub_id', values='value')

# one trace for each column per dataframe
fig=go.Figure()

# set up the first trace
fig.add_trace(go.Scatter(x=dfp['link_clicks'].index,
                             y=dfp['link_clicks']['1233'],
                             visible=True)
             )

fig.add_trace(go.Scatter(x=dfp['link_clicks'].index,
                             y=dfp['link_clicks']['3424'],
                             visible=True)
             )

# plotly start
# buttons for menu 1, names
updatemenu=[]
buttons=[]

# button with one option for each dataframe
for df in dfp.keys():
    buttons.append(dict(method='restyle',
                        label=df,
                        visible=True,
                        args=[{'y':[dfp[str(df)]['1233'].values, dfp[str(df)]['3424'].values],
                               'x':[dfp[str(df)].index],
                               'type':'scatter'}],
                        )
                  )

# some adjustments to the updatemenus
updatemenu=[]
your_menu=dict()
updatemenu.append(your_menu)
updatemenu[0]['buttons']=buttons
updatemenu[0]['direction']='down'
updatemenu[0]['showactive']=True

# add dropdown menus to the figure
fig.update_layout(showlegend=False, updatemenus=updatemenu)

# add notations to the dropdown menus
fig.update_layout(
    annotations=[
        go.layout.Annotation(text="<b>stat_type:</b>",
                             x=-0.3, xref="paper",
                             y=1.1, yref="paper",
                             align="left", showarrow=False),
                          ]
)

fig.show()