## Dummy Data for POC Sefaz RJ
- Developers: Lucas Moutinho, Victor Ciurlini
- Code developed by team Ninjas Vert: Soluções em TIC

<br>
<img src="image/sefaz.jpg" width="400" height="200">

### Create tables

In [16]:
import pandas as pd
import random
import string
import datetime
import matplotlib.pyplot as plt

In [28]:
data_tables = ["NOTA_FISCAL", "PRODUTO_SERVICO", "SITUACAO_NF", "PRODUTO", "IS_VW_ESTAB_EMPRESA", "IS_VW_ESTAB_ATV_ECO"]
df = {}
data_length = 300
for x in data_tables:
    data = {"id": [x for x in range(data_length)]}
    df["DATA_{0}".format(x)] = pd.DataFrame(data)
print(df)

{'DATA_NOTA_FISCAL':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns], 'DATA_PRODUTO_SERVICO':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns], 'DATA_SITUACAO_NF':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns], 'DATA_PRODUTO':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns], 'DATA_IS_VW_ESTAB_EMPRESA':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns], 'DATA_IS_VW_ESTAB_ATV_ECO':       id
0      0
1      1
2      2
3      3
4      4
..   ...
295  295
296  296
297  297
298  298
299  299

[300 rows x 1 columns]}


### Auxiliary functions

In [80]:
# Returns a list of randomized varchars with the specified data_lenth. 
# If column_length is 0 or negative a random size list is returned, with
# a max length determined with the variable max_random_length
def generate_varchar_column(data_length, column_length = 0, max_random_length = 10):
    varchar_list = []
    if column_length > 0: # fixed size
        for _ in range(data_length):
            varchar_list.append(''.join(random.choices(string.ascii_uppercase + string.digits, k = column_length)))
    else: # random size
        for _ in range(data_length):
            varchar_list.append(''.join(random.choices(string.ascii_uppercase + string.digits, k = random.randint(1,max_random_length))))
    return varchar_list

# Returns a list of randomized numbers with the specified data_lenth.
# If column_length is 0 or negative a random size list is returned, with
# a max length determined with the variable max_random_length
def generate_number_column(data_length, column_length = 0, max_random_length = 10):
    number_list = []
    if column_length > 0: # fixed size
        for _ in range(data_length):
            number_list.append(int(''.join(random.choices(string.digits, k = column_length))))
    else: # random size
        for _ in range(data_length):
            number_list.append(int(''.join(random.choices(string.digits, k = random.randint(1,max_random_length)))))
    return number_list

# Returns a list of single randomized numbers within a certain range with the specified data_lenth.
# The min value of the range is passed in min_range and the max at max_range
def generate_number_in_range_column(data_length, min_range = 1, max_range = 9):
    number_in_range_list = []
    for _ in range(data_length):
        number_in_range_list.append(random.randint(min_range,max_range))
    return number_in_range_list

# Returns a list of randomized dates within a certain year range with the specified data_lenth.
# The min value of the year range is passed in min_range and the max at max_range
def generate_date_column(data_length, min_range = 1990, max_range = 2020):
    date_list = []
    for _ in range(data_length):
        date = datetime.datetime.strptime('{} {}'.format(random.randint(1, 366), random.randint(min_range,max_range)), '%j %Y')
        date_list.append(date)
    return date_list

### Dummy data for SITUACAO_NF

In [109]:
df["DATA_SITUACAO_NF"]["CO_CHAVE_ACESSO"] = generate_varchar_column(data_length,61)
df["DATA_SITUACAO_NF"]["DT_STATUS"] = generate_date_column(data_length)
df["DATA_SITUACAO_NF"]["SITUACAO_NF_CO_SITUACAO"] = generate_varchar_column(data_length)
df["DATA_SITUACAO_NF"]["TP_ORIGEM_ATUALIZACAO"] = generate_number_in_range_column(data_length,max_range=3)
df["DATA_SITUACAO_NF"]["CO_CHAVE_ACESSO_CANCELAMENTO"] = generate_varchar_column(data_length,61)
print(df["DATA_SITUACAO_NF"].head())

   id                                    CO_CHAVE_ACESSO  DT_STATUS  \
0   0  IKQ6OGEDKS1S7M8END02VE0D6YJYT7MDF3JWNMZ9NSG2HO... 2003-03-26   
1   1  M016LQ87X0NMN8KOWWKVGD3HZ2GA2XMY0QDRCSJ1G1HTFK... 2010-10-24   
2   2  W7FSZQRAYTAHKPRUL6LTGT5SIII3ECYUQ2K9KJ0SYRYU04... 2010-08-15   
3   3  R18KVRU1K8WRH0WE213P2QXN8MATZHWNRFMH4Z0HIUQC69... 2020-07-24   
4   4  I13ILU3BXB9AH82T70T33LC3IWWZUS9ULL7QPB45Q30113... 2018-11-27   

  SITUACAO_NF_CO_SITUACAO  TP_ORIGEM_ATUALIZACAO  \
0                      5S                      3   
1                     AY7                      3   
2                   Y99X6                      1   
3                       B                      3   
4                       K                      1   

                        CO_CHAVE_ACESSO_CANCELAMENTO  
0  5QAT5LZ436AI7CZU136QCLWODOSCMPEQOU8XHX3UR235PG...  
1  6SCD31XD1AN1TBFYMILPNRA9L2BQL4AOGCOOI5QJ31RHOG...  
2  QOKKSLZZA242JYXDJ9PKWULUAFB9TVTTNRYS3UV7TEM3ZG...  
3  EVXGRZ3LFALFP78T7MGJ0UU1RRBP2VM9WI2A3

### Dummy data for IS_VW_ESTAB_EMPRESA

In [110]:
df["DATA_IS_VW_ESTAB_EMPRESA"]["SQ_ESTABELECIMENTO"] = generate_number_column(data_length,9)
df["DATA_IS_VW_ESTAB_EMPRESA"]["SQ_INSCRICAO_ESTABELECIMENTO"] = generate_number_column(data_length,9)
df["DATA_IS_VW_ESTAB_EMPRESA"]["NU_INSCRICAO_ESTADUAL"] = generate_number_column(data_length,8)
df["DATA_IS_VW_ESTAB_EMPRESA"]["DT_CONCESSAO_INSCRICAO"] = generate_date_column(data_length)
df["DATA_IS_VW_ESTAB_EMPRESA"]["SQ_ATIVIDADE_ECONOMICA"] = generate_number_column(data_length,9)
df["DATA_IS_VW_ESTAB_EMPRESA"]["CO_ATIVIDADE_ECONOMICA"] = generate_varchar_column(data_length,7)
df["DATA_IS_VW_ESTAB_EMPRESA"]["NO_ATIVIDADE_ECONOMICA"] = generate_varchar_column(data_length,200)
df["DATA_IS_VW_ESTAB_EMPRESA"]["SQ_VERSAO_TABELA_ATIVIDADES"] = generate_number_column(data_length,9)
df["DATA_IS_VW_ESTAB_EMPRESA"]["SQ_NIVEL_ATIVIDADE_ECONOMICA"] = generate_number_column(data_length,9)
df["DATA_IS_VW_ESTAB_EMPRESA"]["IN_PRINCIPAL"] = generate_number_column(data_length,1)
df["DATA_IS_VW_ESTAB_EMPRESA"]["DT_INICIO_EXERCICIO"] = generate_date_column(data_length)
df["DATA_IS_VW_ESTAB_EMPRESA"]["NU_ORDEM"] = generate_number_column(data_length)
print(df["DATA_IS_VW_ESTAB_EMPRESA"].head())

   id  SQ_ESTABELECIMENTO  SQ_INSCRICAO_ESTABELECIMENTO  \
0   0           220539186                     900125937   
1   1           931363086                      49236722   
2   2           679688403                     525610878   
3   3           818068752                     220418044   
4   4           909322423                     743296735   

   NU_INSCRICAO_ESTADUAL DT_CONCESSAO_INSCRICAO  SQ_ATIVIDADE_ECONOMICA  \
0               45458602             2006-11-11               768113364   
1               98505540             2016-01-06               500113655   
2               59268353             2017-08-20               569101680   
3               22836470             1992-07-31                15688001   
4               16163408             1991-01-25               582296427   

  CO_ATIVIDADE_ECONOMICA                             NO_ATIVIDADE_ECONOMICA  \
0                IL6G4Z7  KOZIFN1Y4RQ1HFECY6OF4490MKVEN1BX34UQ9AZJYB168J...   
1                QCXGIE7  JR6OVO6975