In [1]:
import pandas as pd
import numpy as np
import re 
from codigo_limpo import df_tidy_simp

In [2]:
df_dash = pd.read_excel('Estudo_de_Garantias_v3.xlsx', sheet_name='Dashboard', header=1)
df_class = pd.read_excel('Estudo_de_Garantias_v3.xlsx', sheet_name='Classificação', header =1)
df_simp = df_tidy_simp.copy()  

In [3]:
class_map = (
    df_class
    .dropna(subset=['Subclasse'])
    .set_index(['Código','Subclasse'])['Nota']
    .to_dict()
)

In [4]:
df_simp[['Código', 'Subclasse']] =df_simp['Garantia'].str.split(' ',n=1, expand=True)
df_simp[['Código', 'Subclasse', 'Nota']].head(10)

Unnamed: 0,Código,Subclasse,Nota
0,AF,SPE,2.0
1,AF,Terreno e SPE + CF,3.0
2,AF,SPE + Fiança,2.0
3,AF,Imóvel + Aval,3.0
4,AF,SPE,2.0
5,clean,,0.0
6,AF,Imóvel + CF + Coobrigação + Aval,3.0
7,AF,Imóvel e Terreno + CF + Aval,3.0
8,AF,Imóvel e SPE + CF + Fiança,3.0
9,AF,Terreno,3.0


In [5]:
def split_subclasses(sub):
    if pd.isna(sub):
        return "sem subclasse"
    parts = re.split(r'\s*(?:\+|#| e )\s*', sub)
    return [p.strip() for p in parts if p.strip()]

In [6]:
def upperLetter(sub):
    if pd.isna(sub) or not isinstance(sub, str):
        return sub
    if sub and sub[0].isupper() == False:
        return sub[0].upper() + sub[1:]
    return sub


In [7]:
def select_best_note(code, parts):
    notes = [class_map.get((code, p), np.nan) for p in parts]
    valid_notes = [n for n in notes if not np.isnan(n)]
    if valid_notes:
        return np.nanmax(valid_notes)
    notes_for_code = [
        v
        for (c, p), v in class_map.items()
        if c == code and not pd.isna(v)
    ]
    if notes_for_code:
        return np.nanmax(notes_for_code)
    return np.nan


In [8]:
def get_best_note(row):
    code = row['Código']
    sub  = row['Subclasse']
    
    # 1) tokeniza
    parts = split_subclasses(sub)
    # split_subclasses retorna lista ou a string "sem subclasse"
    if parts == "sem subclasse":
        parts_list = []
    else:
        parts_list = parts
    
    # 2) normaliza
    parts_list = [upperLetter(p) for p in parts_list]
    
    # 3) seleciona nota
    return select_best_note(code, parts_list)

In [9]:
df_simp['Nota_calculada'] = df_simp.apply(get_best_note, axis=1)


In [10]:
def calculo_score(norm , nota_calculada):
    produto = norm * nota_calculada
    soma = produto.sum()
    return soma /0.03

In [14]:
fundo_teste = 'VGIR11'   # ou MXRF11, RBRY11, etc.

# filtra só as linhas desse fundo
df_debug = df_simp[df_simp['Fundo'] == fundo_teste]

# mostra os 10 primeiros ativos com suas colunas relevantes
df_debug[['Ativo', 'Garantia', 'Norm.', 'Nota_calculada', 'Nota']].head(50)

score_debug = calculo_score(df_debug['Norm.'], df_debug['Nota_calculada'])
score_debug

np.float64(80.93907184626035)

In [12]:
scores = (
    df_simp
    .groupby('Fundo', sort=False)[['Norm.', 'Nota_calculada']]
    .apply(lambda g: calculo_score(g['Norm.'], g['Nota_calculada']))
)


In [13]:
for fundo, val in scores.items():
    print(f"{fundo}: {val:.2f}")

VGIR11: 80.94
MXRF11: 0.00
RBRY11: 94.61
KNCR11: 27.44
RBRR11: 88.00
CPTR11: 36.86
KNCA11: 45.55
RURA11: 47.95
KNSC11: 17.41
BODB11: 0.00
KNUQ11: 15.97
HABT11: 1.43
