# Avaliação de despachos dos dispatched documents vs publicações em diário oficial

In [1]:
import pandas as pd

### Importação Diario Oficial Antigo

In [2]:
old_dom = pd.read_csv('ad_oldom_despachos.csv', sep=';', encoding='latin-1')
old_dom.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12232 entries, 0 to 12231
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Data          12232 non-null  object
 1   Pag.          12232 non-null  int64 
 2   Processo SEI  12232 non-null  object
 3   Interessado   12124 non-null  object
 4   N° AD         12232 non-null  object
 5   Assunto       12232 non-null  object
 6   Resultado     12232 non-null  object
 7   Despacho      9058 non-null   object
dtypes: int64(1), object(7)
memory usage: 764.6+ KB


In [3]:
# Mantendo apenas as colunas processo, resultado e data de publicação
old_dom = old_dom[['Processo SEI', 'Resultado', 'Data']]
old_dom.columns = ['processo', 'despacho', 'data_publicacao']
old_dom.head()

Unnamed: 0,processo,despacho,data_publicacao
0,6021.2018/0039868-0,sso SEI: 6021.2018/0039868-0); Art. 2º A ativi...,2020/01/24
1,1010.2020/0007037-4,deferido,2020/12/15
2,1010.2020/0008497-9,deferido,2020/12/19
3,1010.2020/0007987-8,deferido,2020/12/19
4,1010.2020/0007987-8,deferido,2020/12/22


In [4]:
old_dom.query("processo == '1020.2022/0020297-0'")

Unnamed: 0,processo,despacho,data_publicacao


In [5]:
old_dom.loc[1, 'processo']

' 1010.2020/0007037-4'

In [6]:
# Limpando numero de processo
old_dom['processo'] = old_dom['processo'].apply(lambda x: x.strip())
old_dom.loc[1, 'processo']

'1010.2020/0007037-4'

In [7]:
# Excluindo dados provenientes de erro de extração
despachos_validos = ['deferido', 'indeferido', 'indeferido e encerrado']
old_dom_cl = old_dom.query('despacho.isin(@despachos_validos)')
old_dom_cl.head()

Unnamed: 0,processo,despacho,data_publicacao
1,1010.2020/0007037-4,deferido,2020/12/15
2,1010.2020/0008497-9,deferido,2020/12/19
3,1010.2020/0007987-8,deferido,2020/12/19
4,1010.2020/0007987-8,deferido,2020/12/22
5,1010.2020/0007034-0,deferido,2020/12/23


### Diario Oficial Novo

In [8]:
new_dom = pd.read_csv('ad_dom_despachos.csv', sep=';', encoding='latin-1')
new_dom.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14417 entries, 0 to 14416
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   veiculo    14417 non-null  object
 1   orgao      14417 non-null  object
 2   unidade    14417 non-null  object
 3   serie      14417 non-null  object
 4   processo   14417 non-null  object
 5   documento  14417 non-null  int64 
 6   data       14417 non-null  object
dtypes: int64(1), object(6)
memory usage: 788.6+ KB


In [9]:
# Mantendo apenas as colunas processo, despacho e data de publicação
new_dom = new_dom[['processo', 'serie', 'data']]
new_dom.columns = ['processo', 'despacho', 'data_publicacao']
new_dom.head()

Unnamed: 0,processo,despacho,data_publicacao
0,1020.2021/0015175-3,Despacho deferido,01/03/2023
1,1020.2023/0002289-2,Despacho deferido,01/03/2023
2,1020.2023/0003318-5,Despacho deferido,01/03/2023
3,1020.2023/0003276-6,Despacho deferido,01/03/2023
4,1020.2023/0003286-3,Despacho deferido,01/03/2023


In [10]:
despachos_validos = ['Despacho deferido', 'Despacho indeferido', 'Despacho Deferido', 'Despacho Indeferido']
new_dom_cl = new_dom.query('despacho.isin(@despachos_validos)')
new_dom_cl.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5668 entries, 0 to 14378
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   processo         5668 non-null   object
 1   despacho         5668 non-null   object
 2   data_publicacao  5668 non-null   object
dtypes: object(3)
memory usage: 177.1+ KB


### Concat

In [11]:
concat_list = [old_dom_cl, new_dom_cl]
dom = pd.concat(concat_list)
dom.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17806 entries, 1 to 14378
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   processo         17806 non-null  object
 1   despacho         17806 non-null  object
 2   data_publicacao  17806 non-null  object
dtypes: object(3)
memory usage: 556.4+ KB


In [12]:
dom['despacho'].value_counts()

despacho
deferido                  8798
Despacho deferido         3762
indeferido                3135
Despacho indeferido       1868
indeferido e encerrado     205
Despacho Deferido           20
Despacho Indeferido         18
Name: count, dtype: int64

In [13]:
dom = dom.replace('Despacho deferido', 'deferido')
dom = dom.replace('Despacho Deferido', 'deferido')
dom = dom.replace('Despacho indeferido', 'indeferido')
dom = dom.replace('Despacho Indeferido', 'indeferido')
dom = dom.replace('indeferido e encerrado', 'indeferido')
dom['despacho'].value_counts()

despacho
deferido      12580
indeferido     5226
Name: count, dtype: int64

In [14]:
dom.query("processo == '1020.2022/0020297-0'")

Unnamed: 0,processo,despacho,data_publicacao
9307,1020.2022/0020297-0,deferido,2022/10/21
9308,1020.2022/0020297-0,deferido,2022/10/21
9309,1020.2022/0020297-0,deferido,2022/10/21
9314,1020.2022/0020297-0,deferido,2022/10/21


### AD Dispatched

In [15]:
dispatched = pd.read_csv('ad_dispatched_despachos.csv', sep=';', encoding='latin-1')
dispatched.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18081 entries, 0 to 18080
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               18081 non-null  object
 1   protocolo        18081 non-null  object
 2   processo         18081 non-null  object
 3   assunto          18081 non-null  object
 4   despacho         18081 non-null  object
 5   data_publicacao  18081 non-null  object
 6   documento_sei    18081 non-null  int64 
 7   data_extracao    18081 non-null  object
dtypes: int64(1), object(7)
memory usage: 1.1+ MB


In [16]:
# Excluindo entrada repetidas que referenciam o mesmo documento
dispatched = dispatched.drop_duplicates()
dispatched.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18015 entries, 0 to 18080
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   id               18015 non-null  object
 1   protocolo        18015 non-null  object
 2   processo         18015 non-null  object
 3   assunto          18015 non-null  object
 4   despacho         18015 non-null  object
 5   data_publicacao  18015 non-null  object
 6   documento_sei    18015 non-null  int64 
 7   data_extracao    18015 non-null  object
dtypes: int64(1), object(7)
memory usage: 1.2+ MB


In [17]:
dispatched = dispatched[['processo', 'despacho', 'data_publicacao', 'assunto']]

In [18]:
dispatched['despacho'].value_counts()

despacho
Despacho deferido          12770
Despacho indeferido         5241
Despacho Documental            2
Despacho documental            1
Despacho de Retificação        1
Name: count, dtype: int64

### Contagem de despachos por processo

In [19]:
dom_count = dom[['processo', 'despacho']].value_counts()
dom_count.head()

processo             despacho  
1020.2022/0022993-2  deferido      36
1020.2022/0012930-0  deferido      24
1020.2022/0002755-8  indeferido    21
1020.2022/0023390-5  indeferido    14
1020.2022/0009042-0  deferido      10
Name: count, dtype: int64

In [20]:
dom_count_df = dom_count.reset_index()
dom_count_df.columns = ['processo', 'despacho', 'contagem_dom']
dom_count_df.head()

Unnamed: 0,processo,despacho,contagem_dom
0,1020.2022/0022993-2,deferido,36
1,1020.2022/0012930-0,deferido,24
2,1020.2022/0002755-8,indeferido,21
3,1020.2022/0023390-5,indeferido,14
4,1020.2022/0009042-0,deferido,10


In [21]:
dispatched_count = dispatched[['processo', 'despacho']].value_counts()
dispatched_count.head()

processo             despacho           
1020.2021/0001523-0  Despacho indeferido    4
1020.2023/0007094-3  Despacho deferido      4
1020.2021/0015180-0  Despacho indeferido    4
1020.2022/0020300-3  Despacho deferido      4
1020.2022/0020297-0  Despacho deferido      4
Name: count, dtype: int64

In [22]:
dispatched_count_df = dispatched_count.reset_index()
dispatched_count_df.columns = ['processo', 'despacho', 'contagem_dispatched']
dispatched_count_df.head()

Unnamed: 0,processo,despacho,contagem_dispatched
0,1020.2021/0001523-0,Despacho indeferido,4
1,1020.2023/0007094-3,Despacho deferido,4
2,1020.2021/0015180-0,Despacho indeferido,4
3,1020.2022/0020300-3,Despacho deferido,4
4,1020.2022/0020297-0,Despacho deferido,4


### Dispatched para DOC

In [23]:
def comparador_despachos(row, tipo, dados, contra_tipo='dispatched'):
    if tipo == 'dispatched':
        contra_tipo = 'dom'        
    
    x = row['processo']
    z = row[f'contagem_{contra_tipo}']
    
    if x in dados.index:
        if dados.loc[x, f'contagem_{tipo}'] == z:
            return 'correto'
        else:
            return dados.loc[x, f'contagem_{tipo}']
    else:
        return 'inexistente'

In [24]:
dom_deferidos = dom_count_df.query("despacho == 'deferido'").set_index('processo')
dom_indeferidos = dom_count_df.query("despacho == 'indeferido'").set_index('processo')

dom_indeferidos

Unnamed: 0_level_0,despacho,contagem_dom
processo,Unnamed: 1_level_1,Unnamed: 2_level_1
1020.2022/0002755-8,indeferido,21
1020.2022/0023390-5,indeferido,14
1020.2021/0018930-0,indeferido,8
1020.2022/0006954-4,indeferido,5
1020.2020/0014589-1,indeferido,5
...,...,...
1020.2022/0001012-4,indeferido,1
1020.2022/0001022-1,indeferido,1
1020.2022/0001023-0,indeferido,1
1020.2022/0001027-2,indeferido,1


In [25]:
dispatched_count_deferidos = dispatched_count_df.query("despacho == 'Despacho deferido'")[:]
dispatched_count_indeferidos = dispatched_count_df.query("despacho == 'Despacho indeferido'")[:]

dispatched_count_deferidos['contagem_dom'] = dispatched_count_deferidos.apply(comparador_despachos, args=('dom', dom_deferidos,), axis=1)
dispatched_count_indeferidos['contagem_dom'] = dispatched_count_indeferidos.apply(comparador_despachos, args=('dom', dom_indeferidos,), axis=1)

dispatched_count_deferidos.head(20)

Unnamed: 0,processo,despacho,contagem_dispatched,contagem_dom
1,1020.2023/0007094-3,Despacho deferido,4,correto
3,1020.2022/0020300-3,Despacho deferido,4,correto
4,1020.2022/0020297-0,Despacho deferido,4,correto
5,1020.2023/0007666-6,Despacho deferido,3,correto
6,1020.2023/0007651-8,Despacho deferido,3,correto
7,1020.2022/0020307-0,Despacho deferido,3,correto
12,1020.2022/0020826-9,Despacho deferido,3,correto
14,1020.2022/0016144-0,Despacho deferido,3,4
15,1020.2021/0001261-3,Despacho deferido,3,correto
20,1020.2022/0020259-7,Despacho deferido,3,correto


### Dom para dispatched

In [26]:
dispatched_deferidos = dispatched_count_df.query("despacho == 'Despacho deferido'").set_index('processo')
dispatched_indeferidos = dispatched_count_df.query("despacho == 'Despacho indeferido'").set_index('processo')

dispatched_indeferidos

Unnamed: 0_level_0,despacho,contagem_dispatched
processo,Unnamed: 1_level_1,Unnamed: 2_level_1
1020.2021/0001523-0,Despacho indeferido,4
1020.2021/0015180-0,Despacho indeferido,4
1020.2020/0014890-4,Despacho indeferido,3
1020.2023/0004583-3,Despacho indeferido,3
1020.2020/0015637-0,Despacho indeferido,3
...,...,...
1020.2022/0001181-3,Despacho indeferido,1
1020.2022/0001182-1,Despacho indeferido,1
1020.2022/0001184-8,Despacho indeferido,1
1020.2022/0001190-2,Despacho indeferido,1


In [27]:
dom_count_deferidos = dom_count_df.query("despacho == 'deferido'")[:]
dom_count_indeferidos = dom_count_df.query("despacho == 'indeferido'")[:]

dom_count_deferidos['contagem_dispatched'] = dom_count_deferidos.apply(comparador_despachos, args=('dispatched', dispatched_deferidos,), axis=1)
dom_count_indeferidos['contagem_dispatched'] = dom_count_indeferidos.apply(comparador_despachos, args=('dispatched', dispatched_indeferidos,), axis=1)

dom_count_deferidos.head(20)

Unnamed: 0,processo,despacho,contagem_dom,contagem_dispatched
0,1020.2022/0022993-2,deferido,36,1
1,1020.2022/0012930-0,deferido,24,2
4,1020.2022/0009042-0,deferido,10,1
6,1020.2021/0005918-0,deferido,7,1
7,1020.2020/0015162-0,deferido,6,1
8,1020.2021/0018962-9,deferido,6,3
9,1020.2023/0004409-8,deferido,5,1
11,1020.2021/0009149-1,deferido,5,2
12,1020.2021/0001810-7,deferido,5,3
13,1020.2023/0014777-6,deferido,5,2


In [28]:
concat_container = [dispatched_count_deferidos, dispatched_count_indeferidos]

df_final = pd.concat(concat_container)
df_final.to_csv('dispatched_x_dom.csv', sep=';', index=False)

concat_container = [dom_count_deferidos, dom_count_indeferidos]

df_final = pd.concat(concat_container)
df_final.to_csv('dom_x_dispatched.csv', sep=';', index=False)