<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# FEC - Création du dataset "Trésorerie"

**Tags:** #fec #finance #snippet #operation #trésorerie #tft

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** Ce notebook permet de créer le dataframe pour visualiser l'évolution de la trésorerie avec les entrées et sorties de cash et la position à travers les mois.

## Input

### Import libraries

In [None]:
import naas_data_product

### Setup Variables

In [None]:
# Inputs
input_folder_path = "/home/ftp/FEC-engine/outputs/FEC/bdd_fin"

# Outputs
output_folder_path = "/home/ftp/FEC-engine/outputs/FEC/dataset_tresorerie"
prompt_data = """
Voici le jeu de données sur l'évolution de la trésorerie d'une entreprise sur les 12 moins de l'année avec:
- la position de trésorerie en colonne VALUE
- le montant des encaissements en colonne CASH_IN
- le montant des décaissements en colonne CASH_OUT
Peux-tu me donner un bref aperçu factuel de trésorerie et identifier les variations les plus importantes (CASH_IN) & (CASH_OUT)?
Peux-tu commencer ton analyse par "Le trésorerie de l'entreprise a ..."?
"""

## Model

### Récupération du dernier fichier input

In [None]:
df_input = get_last_df(input_folder_path)
print("✅ Row fetched:", len(df_input))
df_input.head(1)

### Création du dataset "TRESORERIE"

In [None]:
def prep_data(df_init):
    # Init
    df = df_init.copy()
    df_output = pd.DataFrame()
    
    # Filtre COMPTE_NUM = Chiffre d'Affaire (RUBRIQUE N1)
    df = df[df["COMPTE_NUM"].str.contains(r"^5")]
    
    # Cash in / Cash out
    df.loc[df.VALUE > 0, "CASH_IN"] = df.VALUE
    df.loc[df.VALUE < 0, "CASH_OUT"] = df.VALUE

    # Regroupement
    to_group = [
        "ENTITY",
        "PERIOD",
        "DATE",
    ]
    to_agg = {"VALUE": "sum", "CASH_IN": "sum", "CASH_OUT": "sum"}
    df = df.groupby(to_group, as_index=False).agg(to_agg).sort_values(by=["ENTITY", "PERIOD", "DATE"], ascending=[True, False, True]).fillna(0)

    # Format columns
    df["DATE"] = pd.to_datetime(df["DATE"])
    df["CASH_OUT"] = df["CASH_OUT"].abs()
    
    # Reindex value
    filters = df[["ENTITY", "PERIOD"]].drop_duplicates(ignore_index=True)
    for row in filters.itertuples():
        entity = row.ENTITY
        period = row.PERIOD
        tmp_df = df[(df["ENTITY"] == entity) & (df["PERIOD"] == period)]
        
        # Create date range
        d_start = tmp_df.loc[tmp_df.index[0], "DATE"]
        start_year = d_start.year
        start_month = d_start.month
        start = f'{start_year}-{start_month}-01'
        d_end = tmp_df.loc[tmp_df.index[-1], "DATE"]
        end = d_end
        idx = pd.date_range(start, end, freq="D")
        
        # Reindex by DATE
        tmp_df.set_index("DATE", drop=True, inplace=True)
        tmp_df.index = pd.DatetimeIndex(tmp_df.index)
        tmp_df = tmp_df.reindex(idx, fill_value=0)
        
        # Enforce data
        tmp_df["ENTITY"] = entity
        tmp_df["PERIOD"] = period
        tmp_df.insert(loc=2, column="DATE", value=pd.DatetimeIndex(tmp_df.index))
        
        # Groupby analytics
        tmp_df["DATE"] = tmp_df["DATE"].dt.strftime("%Y-%m")
        tmp_df = tmp_df.groupby(to_group, as_index=False).agg(to_agg)
        tmp_df["VALUE_LINE"] = tmp_df["VALUE"].cumsum()
        
        # Concat
        df_output = pd.concat([df_output, tmp_df])
    return df_output.reset_index(drop=True)

data_prep = prep_data(df_input)
print("Row fetched:", len(data_prep))
data_prep.head(12)

In [None]:
def create_data_treso(df_init):
    # Init
    df = df_init.copy()
    df_output = pd.DataFrame()
    
    # Get filters
    filters = df[["ENTITY", "PERIOD"]].drop_duplicates(ignore_index=True)
    for row in filters.itertuples():
        entity = row.ENTITY
        period = row.PERIOD
        tmp_df = df.copy()
        tmp_df = tmp_df[(tmp_df["ENTITY"] == entity) & (tmp_df["PERIOD"] == period)]
        
        # AI analysis
        print(f"➡️ Entity: {entity}, Periode: {period}")
        analysis = get_ia_analysis(tmp_df, prompt_data)
        tmp_df["AI_ANALYSIS"] = analysis
        
        # Concat
        df_output = pd.concat([df_output, tmp_df])
            
    # Column to be displayed
    df_output["LABEL"] = pd.to_datetime(df_output["DATE"], format="%Y-%m").dt.strftime("%m")
    df_output["VALUE_D"] = (df_output["VALUE"] / 1000).map("{:,.1f} k€".format).str.replace(",", " ")
    df_output["CASH_IN_D"] = (df_output["CASH_IN"] / 1000).map("{:,.1f} k€".format).str.replace(",", " ")
    df_output["CASH_OUT_D"] = (df_output["CASH_OUT"] / 1000).map("{:,.1f} k€".format).str.replace(",", " ")
    df_output["VALUE_LINE_D"] = (df_output["VALUE_LINE"] / 1000).map("{:,.1f} k€".format).str.replace(",", " ")
    return df_output.reset_index(drop=True)

df_tresorerie = create_data_treso(data_prep)
print("Row fetched:", len(df_tresorerie))
df_tresorerie.head(12)

## Output

### Sauvegarde des fichiers en csv

In [None]:
save_df(df_tresorerie, output_folder_path)