In [351]:
import pandas as pd
import math
pd.options.display.max_rows

class Forbidden:
    
    def __init__(self):
        classes = pd.read_pickle('classes.pkl')
        subjects = pd.read_pickle('subjects.pkl')
        
        self.class_sons = classes[['codigo', 'todos_filhos']]
        self.subject_sons = subjects[['codigo', 'todos_filhos']]
        
        classes['class_cod'] = classes.codigo
        subjects['subject_cod'] = subjects.codigo

        self.rules = pd.DataFrame(index=classes.class_cod, columns=subjects.subject_cod).fillna(0)
        
    def add_rule(self, cod_class, cod_subject):        
        self.rules.loc[
            self.class_sons.loc[self.class_sons['codigo'] == cod_class, 'todos_filhos'].iloc[0],
            self.subject_sons.loc[self.subject_sons['codigo'] == cod_subject, 'todos_filhos'].iloc[0]
        ] = 1
        
    def is_forbidden(self, cod_class, cod_subject):
        return self.rules.loc[cod_class, cod_subject] == 1

class Frequency:
    
    def __init__(self):
        self.subject_basic = pd.read_pickle('subject_basic.pkl')
        self.classes = pd.read_pickle('classes.pkl')
        self.subjects = pd.read_pickle('subjects.pkl')
        self.levels = {
            'class_level_0': None,
            'class_level_1': None,
            'class_level_2': None,
            'class_level_3': None,
            'class_level_4': None,
            'subject_level_0': None,
            'subject_level_1': None,
            'subject_level_2': None,
            'subject_level_3': None,
            'subject_level_4': None,
            'subject_level_5': None
        }
        self.class_level = 0
        self.subject_level = 0
        self.class_subject_freq = {}
        self.class_subject_translation = None
        self.forbidden = Forbidden()
        
    def translate_class_subject_freq(self, class_codes=[]):
        class_subject_translation = {}
        multiple_class_names = 0
        multiple_subject_names = 0

        for class_code in self.class_subject_freq:
            if len(class_codes) > 0 and class_code not in class_codes:
                continue

            class_series = self.classes.loc[self.classes['codigo'] == class_code]
            class_name = class_code if class_series.size == 0 else class_series['descricao'].iloc[0]
            class_name += ' (code: {})'.format(str(class_code))
            class_subject_translation[class_name] = {}

            for subject_code in self.class_subject_freq[class_code]:
                subject_series = self.subjects.loc[self.subjects['codigo'] == subject_code]

                subject_name = subject_code
                if subject_series.size > 0:
                    subject_name = subject_series['descricao'].iloc[0] + ' (code: {})'.format(subject_code)

                    parent_name = subject_series['cod_pai'].iloc[0]
                    if math.isnan(parent_name):
                        parent_name = None
                    else:
                        parent_name = self.subjects.loc[self.subjects['codigo'] == parent_name]['descricao'].iloc[0]

                    if parent_name:
                        subject_name = '{} >> {} (code: {})'.format(parent_name, subject_name, subject_code)
#                         subject_name = '{} >> {}'.format(parent_name, subject_name)

                    if self.forbidden.is_forbidden(cod_class=class_code, cod_subject=subject_code):
                        subject_name += '🚨'
    
                class_subject_translation[class_name][subject_name] = self.class_subject_freq[class_code][subject_code]

        return class_subject_translation

    def get_level_keys (levels, deepen_class=False, deepen_subject=0):
        l = ([key for key in levels.keys() if levels[key] != None])

        if 'class_level_0' not in l:
            l = l + ['class_level_0']

        if 'subject_level_0' not in l:
            l = l + ['subject_level_0']

        if deepen_class:
            class_levels = [int(c[-1:]) for c in l if c.startswith('class')]
            if len(class_levels) > 0:
                max_class_level = max(class_levels)
                if max_class_level < 4:
                    l = l + ['class_level_' + str(max_class_level+1)]

        if deepen_subject:
            subject_levels = [int(s[-1:]) for s in l if s.startswith('subject')]
            if len(subject_levels) > 0:
                max_subject_level = max(subject_levels)
                if max_subject_level < 5:
                    l = l + ['subject_level_' + str(max_subject_level+1)]

        return l
    
    
    def translate(self):
        class_subject_translation = self.translate_class_subject_freq([])

        for class_name in class_subject_translation:
            print('\n' + class_name)

            subjects_name = list(class_subject_translation[class_name].items())
            subjects_name.sort(key=lambda tup: tup[1])
            for subject_name, freq in subjects_name:
                print('\t{} - {}'.format(subject_name, freq))
                
                
    def filter_classes_subjects (self):
        df = self.subject_basic
        for level in self.levels:
            if self.levels[level] != None:
                df = df.loc[df[level] == self.levels[level]]

        return df
    
    def get_frequencies(self):
        self.class_subject_freq = self.filter_classes_subjects()
        self.class_subject_freq = self.get_class_subject_freq(self.class_subject_freq, 'cod', self.class_level, self.subject_level)

    def get_class_subject_freq(self, df, subject_col, class_level = None, subject_level = None):
        class_subject_freq = {}

        for index, row in df.iterrows():
            suit_class = row['classeProcessual']

            # If a level is given, get the class_parent in the given tree level (height)
            if class_level is not None:
                cur_classe = self.classes.loc[self.classes['codigo'] == suit_class]

                # Check if element exists in class/subject csv
                if cur_classe.size > 0:
                    parents = cur_classe.iloc[0]['cod_pais']

                    # If level is not available for that element, it is already a leaf.
                    if class_level < len(parents):
                        suit_class = parents[class_level]

            if suit_class not in class_subject_freq:
                class_subject_freq[suit_class] = {}

            subject_code = row[subject_col]

            # If a level is given, get the subject_parent in the given tree level (height)
            if subject_level is not None:
                cur_subject = self.subjects.loc[self.subjects['codigo'] == subject_code]

                # Check if element exists in class/subject csv
                if cur_subject.size > 0:
                    parents = cur_subject.iloc[0]['cod_pais']

                    # If level is not available for that element, it is already a leaf.
                    if subject_level < len(parents):
                        subject_code = parents[subject_level]

            if subject_code not in class_subject_freq[suit_class]:
                class_subject_freq[suit_class][subject_code] = 0
            class_subject_freq[suit_class][subject_code] += 1

        return class_subject_freq
        
    def open_class(self, c):
        if self.class_level > 0 and self.levels['class_level_' + str(self.class_level-1)] == None:
            self.levels['class_level_' + str(self.class_level-1)] = c
        elif self.class_level > 0 and self.levels['class_level_' + str(self.class_level-1)] != c:
            self.levels['class_level_' + str(self.class_level)] = c
        else:
            self.levels['class_level_' + str(self.class_level)] = c
    
    def open_subject(self, s):
        if self.subject_level > 0 and self.levels['subject_level_' + str(self.subject_level-1)] == None:
            self.levels['subject_level_' + str(self.subject_level-1)] = s
        else:
            self.levels['subject_level_' + str(self.subject_level)] = s
        
    def raise_subject_level(self):
        if self.levels['subject_level_' + str(self.subject_level)] != None and self.subject_level < 5:
            self.subject_level+=1
    
    def raise_class_level(self):
        if self.levels['class_level_' + str(self.class_level)] != None and self.class_level <4:
            self.class_level+=1

    def lower_subject_level(self):
        if self.levels['subject_level_' + str(self.subject_level)] != None and self.subject_level > 0:
            self.subject_level-=1
    
    def lower_class_level(self):
        if self.levels['class_level_' + str(self.class_level)] != None and self.class_level > 0:
            self.class_level-=1

In [357]:
freq = Frequency()

In [358]:
freq.forbidden.add_rule(cod_class=2, cod_subject=287)
freq.forbidden.add_rule(cod_class=2, cod_subject=1209)
freq.forbidden.add_rule(cod_class=268, cod_subject=899)
freq.forbidden.add_rule(cod_class=268, cod_subject=12734)
freq.forbidden.add_rule(cod_class=385, cod_subject=14)
freq.forbidden.add_rule(cod_class=268, cod_subject=195)

In [341]:
freq.get_frequencies()

In [342]:
freq.translate()


PROCESSO CÍVEL E DO TRABALHO (code: 2.0)
	DIREITO PENAL (code: 287.0)🚨 - 1
	DIREITO PROCESSUAL PENAL (code: 1209.0)🚨 - 1
	DIREITO INTERNACIONAL (code: 6191.0) - 2
	REGISTROS PÚBLICOS (code: 7724.0) - 8
	DIREITO PROCESSUAL CIVIL E DO TRABALHO (code: 8826.0) - 19
	DIREITO DO CONSUMIDOR (code: 1156.0) - 504
	DIREITO ASSISTENCIAL (code: 12734.0) - 733
	0.0 - 1363
	DIREITO CIVIL (code: 899.0) - 2055
	DIREITO ADMINISTRATIVO E OUTRAS MATÉRIAS DE DIREITO PÚBLICO (code: 9985.0) - 4331
	DIREITO PREVIDENCIÁRIO (code: 195.0) - 5271
	DIREITO TRIBUTÁRIO (code: 14.0) - 12128

PROCESSO CRIMINAL (code: 268.0)
	DIREITO CIVIL (code: 899.0)🚨 - 1
	DIREITO ASSISTENCIAL (code: 12734.0)🚨 - 1
	DIREITO ADMINISTRATIVO E OUTRAS MATÉRIAS DE DIREITO PÚBLICO (code: 9985.0) - 4
	DIREITO PREVIDENCIÁRIO (code: 195.0) - 6
	DIREITO PROCESSUAL PENAL (code: 1209.0) - 227
	0.0 - 979
	DIREITO PENAL (code: 287.0) - 6715

EXECUÇÃO PENAL E DE MEDIDAS ALTERNATIVAS (code: 385.0)
	DIREITO TRIBUTÁRIO (code: 14.0)🚨 - 1
	0.0 - 5
	DI

In [362]:
freq = Frequency()

freq.forbidden.add_rule(cod_class=2, cod_subject=287)
freq.forbidden.add_rule(cod_class=2, cod_subject=1209)
freq.forbidden.add_rule(cod_class=268, cod_subject=899)
freq.forbidden.add_rule(cod_class=268, cod_subject=12734)
freq.forbidden.add_rule(cod_class=385, cod_subject=14)
freq.forbidden.add_rule(cod_class=268, cod_subject=195)

freq.open_class(268)
freq.open_subject(195)
freq.raise_subject_level()
# freq.raise_class_level()
freq.get_frequencies()
freq.translate()


PROCESSO CRIMINAL (code: 268)
	DIREITO PREVIDENCIÁRIO (code: 195.0)🚨 - 6


In [287]:
freq.raise_class_level()
freq.get_frequencies()
freq.translate()


Processo de Conhecimento (code: 1106.0)
	DIREITO PROCESSUAL PENAL >> Ação Penal (code: 4263.0) (code: 4263.0) - 1


In [288]:
freq.filter_classes_subjects()

Unnamed: 0,millisInsercao,siglaTribunal,grau,numero,classeProcessual,nomeOrgao,codigoOrgao,instancia,codigoPaiNacional,cod,...,subject_level_1,subject_level_2,subject_level_3,subject_level_4,subject_level_5,class_level_0,class_level_1,class_level_2,class_level_3,class_level_4
17711,1570027385256,TRF1,G2,23998520104010000,138,Gab. 21 - DESEMBARGADORA FEDERAL ÂNGELA CATÃO,30,ESP,10603.0,10603.0,...,4263.0,4271.0,,,,2.0,1106.0,1107.0,26.0,62.0


In [282]:
freq.levels

{'class_level_0': None,
 'class_level_1': None,
 'class_level_2': None,
 'class_level_3': None,
 'class_level_4': None,
 'subject_level_0': None,
 'subject_level_1': None,
 'subject_level_2': None,
 'subject_level_3': None,
 'subject_level_4': None,
 'subject_level_5': None}

In [283]:
print(freq.class_level)
print(freq.subject_level)

0
0


In [284]:
freq.class_subject_freq

{2.0: {195.0: 5271,
  899.0: 2055,
  1156.0: 504,
  9985.0: 4331,
  12734.0: 733,
  14.0: 12128,
  8826.0: 19,
  7724.0: 8,
  6191.0: 2,
  287.0: 1,
  1209.0: 1,
  0.0: 1363},
 268.0: {287.0: 6715,
  1209.0: 227,
  0.0: 979,
  899.0: 1,
  9985.0: 4,
  195.0: 6,
  12734.0: 1},
 385.0: {287.0: 818, 1209.0: 53, 0.0: 5, 14.0: 1}}

In [278]:
freq.filter_classes_subjects()

Unnamed: 0,millisInsercao,siglaTribunal,grau,numero,classeProcessual,nomeOrgao,codigoOrgao,instancia,codigoPaiNacional,cod,...,subject_level_1,subject_level_2,subject_level_3,subject_level_4,subject_level_5,class_level_0,class_level_1,class_level_2,class_level_3,class_level_4
0,1569995389625,TRF1,G1,00013178920114013813,436,3ª Vara JEF - Governador Valadares,18388,ORIG,6095.0,6095.0,...,6094.0,,,,,2.0,1106.0,1107.0,,
2,1569970199716,TRF1,G1,00002487020164013805,156,1ª São Sebastião do Paraíso,12406,ORIG,6104.0,6104.0,...,6094.0,,,,,2.0,1106.0,155.0,,
11,1565809357322,TRF1,G1,00005834420104013306,436,JEF ADJ - PAULO AFONSO,0,ORIG,6098.0,6098.0,...,6094.0,6096.0,,,,2.0,1106.0,1107.0,,
12,1565809357322,TRF1,G1,00005834420104013306,436,JEF ADJ - PAULO AFONSO,0,ORIG,6098.0,6098.0,...,6094.0,6096.0,,,,2.0,1106.0,1107.0,,
13,1569988224491,TRF1,G1,00005834420104013306,436,Paulo Afonso,12368,ORIG,6098.0,6098.0,...,6094.0,6096.0,,,,2.0,1106.0,1107.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48917,1569986027302,TRF1,G1,00066724720104013803,460,4ª Vara JEF Cível e Criminal - Uberlândia,17709,ORIG,6122.0,6122.0,...,6119.0,6120.0,,,,2.0,197.0,,,
48920,1569964757337,TRF1,G1,00816202720144013800,7,15ª - Belo Horizonte,17668,ORIG,6138.0,6138.0,...,6119.0,,,,,2.0,1106.0,1107.0,,
48922,1569966652070,TRF1,G1,00002512520154013202,436,Tefé,16403,ORIG,6101.0,6101.0,...,6094.0,,,,,2.0,1106.0,1107.0,,
48925,1569961358364,TRF1,G1,00010687520144013703,436,Bacabal,13762,ORIG,6101.0,6101.0,...,6094.0,,,,,2.0,1106.0,1107.0,,
