<br>

# TJSP

In [2]:
!pip3 install tabula

Collecting tabula
  Downloading tabula-1.0.5.tar.gz (9.5 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: tabula
  Building wheel for tabula (setup.py): started
  Building wheel for tabula (setup.py): finished with status 'done'
  Created wheel for tabula: filename=tabula-1.0.5-py3-none-any.whl size=10682 sha256=7be082f5520ad312e06d9e54d6ff0cffb4acf7e892c428f1d2fa7d75c7df7409
  Stored in directory: c:\users\michel\appdata\local\pip\cache\wheels\02\36\f9\1af7331f3459ce65735775e24bfe68e3ea9cac5e5b23b0b42c
Successfully built tabula
Installing collected packages: tabula
Successfully installed tabula-1.0.5


  DEPRECATION: Building 'tabula' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'tabula'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [3]:
import sys
import tabula
import requests
import pandas as pd
from io import BytesIO

In [4]:
from pathlib import Path

In [7]:
# Paths
project_path = Path('.').resolve().parent
package_path = project_path / 'tjsp'
data_path = package_path / 'data'
data_path.mkdir(exist_ok=True)

sys.path.append(str(project_path))

In [8]:
from tjsp.paths import *

In [9]:
def get_data():
    # Requests
    url = 'https://www.tjsp.jus.br/Download/Tabelas/TabelaDebitosJudiciais.pdf'
    r = requests.get(url, allow_redirects=True)
    return r

In [10]:
def save_pdf(input_path):
    # Requests
    r = get_data()

    # Save PDF file
    open(input_path / 'tabela_debitos_judiciais.pdf', 'wb').write(r.content)

    return 0

In [11]:
def get_table():
    # Requests
    r = get_data()

    # Read PDF
    dfs = tabula.read_pdf(BytesIO(r.content), pages='all')

    # Loop
    df_concat = pd.DataFrame()
    for n in range(len(dfs)):
        df = dfs[n]
        df.rename(columns={'Unnamed: 0': 'mes'}, inplace=True, errors='ignore')
        df.set_index('mes', inplace=True)
        df.drop('Unnamed: 1', axis=1, inplace=True, errors='ignore')
        df_concat = pd.concat([df_concat, df], axis=1)

    # Flat Dataframe
    df = df_concat
    df = df.stack()
    df = pd.DataFrame(df)
    df = df.reset_index()

    # Rename Columns
    df.rename(columns={'level_1': 'ano', 0: 'taxa'}, inplace=True, errors='ignore')

    # Rename Values
    dict_mes = {
        'JAN': 1,
        'FEV': 2, 
        'MAR': 3,
        'ABR': 4,
        'MAI': 5,
        'JUN': 6,
        'JUL': 7,
        'AGO': 8,
        'SET': 9,
        'OUT': 10,
        'NOV': 11,
        'DEZ': 12,
    }

    # Ajusta MÃªs
    df = df.replace({'mes': dict_mes})
    df['mes'] = df['mes'].astype(int)

    # Ajusta Ano
    df['ano'] = df['ano'].str.replace(' ', '')
    df['ano'] = df['ano'].astype(int)

    # Ajusta Taxa
    df['taxa_string'] = df['taxa']
    df['taxa'] = df['taxa'].str.replace('-', '', regex=True)
    df['taxa'] = df['taxa'].str.replace('.', '', regex=True)
    df['taxa'] = df['taxa'].str.replace(',', '.', regex=True)
    df = df[df['taxa'] != '']
    df['taxa'] = df['taxa'].astype(float).copy()

    # Ajusta Datas
    df['year'] = df['ano']
    df['month'] = df['mes']
    df['day'] = 1

    df['data'] = pd.to_datetime(df[['year', 'month', 'day']])
    df['data_ref'] = df['data'].dt.strftime('%Y-%m')

    # Drop
    df.drop(['year', 'month', 'day'], axis=1, inplace=True, errors='ignore')

    # Sortear
    df.sort_values('data', inplace=True)
    df = df.reindex(columns=['data', 'data_ref', 'ano', 'mes', 'taxa_string', 'taxa'], copy=True)
    df.reset_index(drop=True, inplace=True)
    return df

In [12]:
# Create Dataframe
df = get_table()

# Save "tabela_debitos_judiciais"
df.to_csv(
    data_path / 'tabela_debitos_judiciais.csv',
    index=False,
    decimal=',',
)
df.tail(3)

AttributeError: module 'tabula' has no attribute 'read_pdf'

<br>

# Export

In [9]:
import os
from traitlets.config import Config
from nbconvert import PythonExporter
from nbconvert.preprocessors import TagRemovePreprocessor

In [10]:
input_filepath = project_path / 'scripts' / 'tjsp_update.ipynb'
output_filepath = project_path / 'tjsp' / 'tjsp_update.py'

In [11]:
# Import the exporter
c = Config()
c.TagRemovePreprocessor.enabled=True
c.ClearOutputPreprocessor.enabled=True
c.TemplateExporter.exclude_markdown=True
c.TemplateExporter.exclude_code_cell=False
c.TemplateExporter.exclude_input_prompt=True 
c.TemplateExporter.exclude_output=True
c.TemplateExporter.exclude_raw=True
c.TagRemovePreprocessor.remove_cell_tags = ('remove_cell',)
c.TagRemovePreprocessor.remove_input_tags = ('remove_cell',)
c.TagRemovePreprocessor.remove_all_outputs_tags = ('remove_output',)
c.preprocessors = ['TagRemovePreprocessor']
c.PythonExporter.preprocessors = ['nbconvert.preprocessors.TagRemovePreprocessor']

# Configure and run out exporter
py_exporter = PythonExporter(config=c)
py_exporter.register_preprocessor(TagRemovePreprocessor(config=c), True)

# Configure and run out exporter - returns a tuple - first element with html, second with notebook metadata
body, metadata = PythonExporter(config=c).from_filename(input_filepath)

# Write to output html file
with open(output_filepath,  'w', encoding='utf-8') as f:
    f.write(body)