In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
path = "base_final_emisiones_and_deciles_chile.xlsx" # Nombre del archivo
data = pd.read_excel(path)

In [9]:
def filtrar_df(data, condicion):
    """
    Filtra un DataFrame según una condición dada y realiza cálculos adicionales.
    
    Args:
        data (pd.DataFrame): El DataFrame a procesar.
        condicion (str): La condición a aplicar como cadena. Por ejemplo, "merged_df['dummy_issue'] == 0".
    
    Returns:
        pd.DataFrame: El DataFrame con el resultado de los cálculos.
    """
    # Crear columna 'dummy_issue' basada en la condición de 'issue_count'
    data['dummy_issue'] = np.where(data['issue_count'] > 0, 1, 0)
    data = data.groupby('NAICS_Sector_Name').filter(lambda x: (x['dummy_issue'] ==1).any())
    data = data[data['NAICS_Sector_Name'] != 'Finance and Insurance']
    # Extraer el año de 'Period_End_Date'
    data['Año'] = pd.to_datetime(data['Period_End_Date']).dt.year
    # Función para categorizar el periodo basado en el año
    def categorize_period(year):
        if 2013 <= year <= 2019:
            return '2013-2019'
        elif 2020<= year <= 2023:
            return '2020-2023'
        else:
            return 'Fuera de rango'
        
    ''' ejemplo de otra categorizacion
    def categorize_period(year):
        if 2013 <= year <= 2019:
            return '2013-2019'
        elif 2020<= year <= 2023:
            return '2020-2023'
        else:
            return 'Fuera de rango'
    '''
    # Crear la columna 'Period' categorizada
    data['Period'] = data['Año'].apply(categorize_period)
# Filtrar los sector 'Educational Services'

    # Agrupar por Sector y Año para calcular la mediana de ROA
    grouped = data.groupby(['NAICS_Sector_Name', 'Año']).agg(
        median_roa=('ROA', 'median')
    ).reset_index()
    
    # Agregar el periodo al DataFrame agrupado
    grouped['Period'] = grouped['Año'].apply(categorize_period)
    
    # Agrupar por Sector y Periodo para calcular el promedio de las medianas
    benchmark = grouped.groupby(['NAICS_Sector_Name', 'Period']).agg(
        avg_median_roa=('median_roa', 'mean')
    ).reset_index()
        # Merge del DataFrame original con el benchmark
    merged_df = pd.merge(data, benchmark, on=['NAICS_Sector_Name', 'Period'], how='inner')
        # Calcular las utilidades típicas y el excedente de la firma
    merged_df['tipical_profits'] = merged_df['Total_Assets,_Reported'] * merged_df['avg_median_roa']
    # Filtrar el DataFrame usando la condición proporcionada
    merged_df = merged_df.loc[eval(condicion)]
    
    # Agrupar por Periodo para calcular el numerador y denominador del excedente
    surplus = merged_df.groupby(['NAICS_Sector_Name','Period']).agg(
        sum_tipical=('tipical_profits', 'sum'),
        sum_net=('Net_Income_Before_Taxes', 'sum')
    ).reset_index()
    return surplus
    

In [10]:
surplus1 =filtrar_df(data, "merged_df['decil'] < 10")
surplus2 =filtrar_df(data, "merged_df['decil'] == 10")

merge_surplus = pd.merge(surplus1,surplus2, on=['NAICS_Sector_Name', 'Period'], how='inner')

merge_surplus['total_net'] = merge_surplus['sum_net_x'] +  merge_surplus['sum_net_y']
merge_surplus['total_tipical'] = merge_surplus['sum_tipical_x'] +  merge_surplus['sum_tipical_y']

merge_surplus['proporcion_net_x'] = merge_surplus['sum_net_x'] / merge_surplus.groupby(['Period', 'NAICS_Sector_Name'])['total_net'].transform('sum')
merge_surplus['proporcion_net_y'] = merge_surplus['sum_net_y'] / merge_surplus.groupby(['Period', 'NAICS_Sector_Name'])['total_net'].transform('sum')

merge_surplus['proporcion_tipical_x'] = merge_surplus['sum_tipical_x'] / merge_surplus.groupby(['Period', 'NAICS_Sector_Name'])['total_tipical'].transform('sum')
merge_surplus['proporcion_tipical_y'] = merge_surplus['sum_tipical_y'] / merge_surplus.groupby(['Period', 'NAICS_Sector_Name'])['total_tipical'].transform('sum')

merge_surplus = merge_surplus.drop(columns=['total_net', 'total_tipical','sum_net_x','sum_net_y','sum_tipical_x','sum_tipical_y'])
# Lista de las variables a multiplicar
columnas_a_multiplicar = ['proporcion_net_x', 'proporcion_net_y', 'proporcion_tipical_x', 'proporcion_tipical_y']
# Multiplicar por 100
merge_surplus[columnas_a_multiplicar] = merge_surplus[columnas_a_multiplicar] * 100
merge_surplus= merge_surplus.sort_values(by='Period')
latex_code = merge_surplus.to_latex(index=False, escape=False,float_format="%.2f")
print(latex_code)

\begin{tabular}{llrrrr}
\toprule
NAICS_Sector_Name & Period & proporcion_net_x & proporcion_net_y & proporcion_tipical_x & proporcion_tipical_y \\
\midrule
Information & 2013-2019 & 24.32 & 75.68 & 17.95 & 82.05 \\
Manufacturing & 2013-2019 & 58.20 & 41.80 & 54.91 & 45.09 \\
Mining, Quarrying, and Oil and Gas Extraction & 2013-2019 & 19.53 & 80.47 & 8.75 & 91.25 \\
Retail Trade & 2013-2019 & -2.92 & 102.92 & 11.27 & 88.73 \\
Transportation and Warehousing & 2013-2019 & 42.42 & 57.58 & 9.07 & 90.93 \\
Utilities & 2013-2019 & 25.22 & 74.78 & 20.73 & 79.27 \\
Information & 2020-2023 & 20.04 & 79.96 & 14.44 & 85.56 \\
Manufacturing & 2020-2023 & 31.95 & 68.05 & 50.70 & 49.30 \\
Mining, Quarrying, and Oil and Gas Extraction & 2020-2023 & 11.00 & 89.00 & 12.53 & 87.47 \\
Retail Trade & 2020-2023 & 11.65 & 88.35 & 10.33 & 89.67 \\
Transportation and Warehousing & 2020-2023 & 30.42 & 69.58 & 13.00 & 87.00 \\
Utilities & 2020-2023 & 14.29 & 85.71 & 21.42 & 78.58 \\
\bottomrule
\end{tabular}

