In [1]:
!pip install flatten_dict

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import requests
import xml.etree.ElementTree as ET
import pandas as pd
from flatten_dict import flatten

def parse_xml(url):
    response = requests.get(url)
    data = response.text
    
    # Parse XML into ElementTree object
    root = ET.fromstring(data)
    
    # Convert ElementTree object into nested dictionary
    xml_dict = element_to_dict(root)
    
    # Flatten the nested dictionary
    flat_dict = flatten(xml_dict, reducer='underscore')
    
    df = pd.DataFrame.from_dict(flat_dict, orient='index').transpose()
    return df


def parse_xml_from_text(input_text):   
    # Flatten the nested dictionary
    flat_dict = flatten(input_text, reducer='underscore')
    
    df = pd.DataFrame.from_dict(flat_dict, orient='index').transpose()
    return df

# Helper function to convert ElementTree object to nested dictionary
def element_to_dict(element):
    result = {}
    
    # Store tag name as key
    result[element.tag] = {}
    
    # Store attributes as sub-dictionary if present
    if element.attrib:
        result[element.tag]['attributes'] = element.attrib
    
    # Store text content if present
    if element.text and element.text.strip():
        result[element.tag]['text'] = element.text.strip()
    
    # Process child elements recursively
    for child in element:
        child_dict = element_to_dict(child)
        if child.tag in result[element.tag]:
            # Handle multiple elements with the same tag name
            if isinstance(result[element.tag][child.tag], list):
                result[element.tag][child.tag].append(child_dict[child.tag])
            else:
                result[element.tag][child.tag] = [result[element.tag][child.tag], child_dict[child.tag]]
        else:
            result[element.tag].update(child_dict)
    
    return result

url = "https://raw.githubusercontent.com/louispaulet/hatvp_viz/main/datasets/base/xml_batches/declarations_hatvp_batch_1.xml"
df = parse_xml(url)
df

Unnamed: 0,declarations_declaration
0,"{'dateDepot': {'text': '11/07/2022 15:40:13'},..."
1,"{'dateDepot': {'text': '27/11/2022 18:18:23'},..."
2,"{'dateDepot': {'text': '19/08/2022 10:08:23'},..."
3,"{'dateDepot': {'text': '04/10/2022 17:22:07'},..."
4,"{'dateDepot': {'text': '03/09/2021 10:41:48'},..."
...,...
95,"{'dateDepot': {'text': '30/07/2021 10:49:14'},..."
96,"{'dateDepot': {'text': '26/04/2022 15:22:12'},..."
97,"{'dateDepot': {'text': '23/03/2021 10:55:37'},..."
98,"{'dateDepot': {'text': '10/09/2020 12:31:53'},..."


In [3]:
def explode_and_concat(df, column):
    # Create a temporary DataFrame that contains the column to be exploded along with the other columns in the original DataFrame
    temp_df = df.copy()
    
    # Explode the column containing lists
    temp_df = temp_df.explode(column)
    
    
    # Normalize the DataFrame
    df_to_normalize = pd.json_normalize(temp_df[column])
    df_to_normalize = df_to_normalize.add_prefix(column + '_')
    
    # Merge the original DataFrame with the normalized DataFrame
    result = pd.concat([temp_df.reset_index(drop=True), df_to_normalize.reset_index(drop=True)], axis=1)
    
    # Drop the original column as its content has been normalized and added as new columns
    result = result.drop(columns=[column])
    
    return result

def not_explode_and_concat(df, column):
    # Create a temporary DataFrame that contains the column to be exploded along with the other columns in the original DataFrame
    temp_df = df.copy()
    
    # Explode the column containing lists
    # temp_df = temp_df.explode(column)
    
    
    # Normalize the DataFrame
    df_to_normalize = pd.json_normalize(temp_df[column])
    df_to_normalize = df_to_normalize.add_prefix(column + '_')
    
    # Merge the original DataFrame with the normalized DataFrame
    result = pd.concat([temp_df.reset_index(drop=True), df_to_normalize.reset_index(drop=True)], axis=1)
    
    # Drop the original column as its content has been normalized and added as new columns
    result = result.drop(columns=[column])
    
    return result

In [41]:
columns_to_explode = [
    # 'declaration_mandatElectifDto_items_items',
    # 'declaration_mandatElectifDto_items_items_remuneration.montant.montant',
    # 'declaration_participationDirigeantDto_items_items',
    'declarations_declaration',
    # 'declaration_participationFinanciereDto_items_items',
    # 'declaration_activCollaborateursDto_items_items',
    # 'declaration_participationDirigeantDto_items_items_remuneration.montant.montant'
]

exploded_df = df

for column in columns_to_explode:
    exploded_df = not_explode_and_concat(exploded_df, column)


In [42]:
exploded_df["declarations_declaration_participationFinanciereDto.items.items"]

0     [{'motif': {'id': {'text': 'CREATION'}, 'label...
1     [{'motif': {'id': {'text': 'CREATION'}, 'label...
2     [{'motif': {'id': {'text': 'CREATION'}, 'label...
3     [{'motif': {'id': {'text': 'CREATION'}, 'label...
4                                                   NaN
                            ...                        
95                                                  NaN
96                                                  NaN
97    [{'motif': {'id': {'text': 'CREATION'}, 'label...
98                                                  NaN
99                                                  NaN
Name: declarations_declaration_participationFinanciereDto.items.items, Length: 100, dtype: object

In [43]:
exploded_df.head(1).T

Unnamed: 0,0
declarations_declaration_dateDepot.text,11/07/2022 15:40:13
declarations_declaration_uuid.text,4344aaa1-874d-4e6d-9b1a-45f7725b710c
declarations_declaration_origine.text,ADEL
declarations_declaration_complete.text,true
declarations_declaration_attachedFiles.attachedFiles.fileName.text,VUE_PDF_DU_RECEPISSE_DU_DEPOT_XML
...,...
declarations_declaration_fonctionBenevoleDto.items.items.conservee.text,
declarations_declaration_mandatElectifDto.items.items.remuneration.montant.montant.annee.text,
declarations_declaration_mandatElectifDto.items.items.remuneration.montant.montant.montant.text,
declarations_declaration_participationDirigeantDto.items.items.commentaire.text,


In [44]:
exploded_df = exploded_df[["declarations_declaration_uuid.text",
"declarations_declaration_dateDepot.text",
"declarations_declaration_general.declarant.nom.text",
"declarations_declaration_general.declarant.prenom.text",
"declarations_declaration_general.declarant.dateNaissance.text",
"declarations_declaration_general.qualiteDeclarantForPDF.text",
"declarations_declaration_participationFinanciereDto.items.items"]]

In [45]:
columns_to_explode = [
    # 'declaration_mandatElectifDto_items_items',
    # 'declaration_mandatElectifDto_items_items_remuneration.montant.montant',
    # 'declaration_participationDirigeantDto_items_items',
    # 'declarations_declaration',
    # 'declaration_participationFinanciereDto_items_items',
    # 'declaration_activCollaborateursDto_items_items',
    # 'declaration_participationDirigeantDto_items_items_remuneration.montant.montant'

    "declarations_declaration_participationFinanciereDto.items.items"
]

for column in columns_to_explode:
    exploded_df = explode_and_concat(exploded_df, column)


In [46]:
exploded_df

Unnamed: 0,declarations_declaration_uuid.text,declarations_declaration_dateDepot.text,declarations_declaration_general.declarant.nom.text,declarations_declaration_general.declarant.prenom.text,declarations_declaration_general.declarant.dateNaissance.text,declarations_declaration_general.qualiteDeclarantForPDF.text,declarations_declaration_participationFinanciereDto.items.items_motif.id.text,declarations_declaration_participationFinanciereDto.items.items_commentaire.text,declarations_declaration_participationFinanciereDto.items.items_nomSociete.text,declarations_declaration_participationFinanciereDto.items.items_evaluation.text,declarations_declaration_participationFinanciereDto.items.items_remuneration.text,declarations_declaration_participationFinanciereDto.items.items_nombreParts.text,declarations_declaration_participationFinanciereDto.items.items_actiConseil.text,declarations_declaration_participationFinanciereDto.items.items_capitalDetenu.text
0,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
1,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],CREDIT AGRICOLE SA,2910,néant,341,Non,
2,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],AIRBUS,1929,NEANT,20,Non,
3,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],L'OREAL,6552,NEANT,20,Non,
4,fa8d18ec-0db9-4a39-b1f4-caba0c31329b,27/11/2022 18:18:23,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],33027,Néant,1500,,81
170,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],0,Néant,99,,100
171,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,,SCI\n [Données non publiées],0,Néant,5200,,81
172,86dd9c5d-41ec-44aa-b664-f8d8ba099348,10/09/2020 12:31:53,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,


In [47]:
exploded_df.to_csv('first_small_finance_extract.csv')

In [48]:
exploded_df.describe()

Unnamed: 0,declarations_declaration_uuid.text,declarations_declaration_dateDepot.text,declarations_declaration_general.declarant.nom.text,declarations_declaration_general.declarant.prenom.text,declarations_declaration_general.declarant.dateNaissance.text,declarations_declaration_general.qualiteDeclarantForPDF.text,declarations_declaration_participationFinanciereDto.items.items_motif.id.text,declarations_declaration_participationFinanciereDto.items.items_commentaire.text,declarations_declaration_participationFinanciereDto.items.items_nomSociete.text,declarations_declaration_participationFinanciereDto.items.items_evaluation.text,declarations_declaration_participationFinanciereDto.items.items_remuneration.text,declarations_declaration_participationFinanciereDto.items.items_nombreParts.text,declarations_declaration_participationFinanciereDto.items.items_actiConseil.text,declarations_declaration_participationFinanciereDto.items.items_capitalDetenu.text
count,174,174,174,174,174,169,94,59,94,94,94,94,34,49
unique,98,98,53,54,55,78,1,23,46,71,27,48,1,18
top,4794faad-da62-40f8-a76d-8e05539adeb8,02/02/2022 22:05:45,ALAIN,Valérie,28/02/1961,Elu départemental/Orne (61)/Vice-président en ...,CREATION,Donation \n [Données non publiées],SCI \n [Données non publiées],500,Néant,100,Non,0
freq,22,22,28,28,28,22,94,12,15,7,22,16,34,6


In [50]:
import pandas as pd

strings_to_remove = [
    'declarations_declaration_general.',
    'declarations_declaration_participationFinanciereDto.items.items_',
    '.text'
]

# Assuming you have a dataframe named 'exploded_df'

# Create a new list to store the modified column names
modified_columns = []

# Iterate over each column name
for col in exploded_df.columns:
    modified_col = col
    # Iterate over each string to remove
    for string in strings_to_remove:
        modified_col = modified_col.replace(string, '')

    modified_col = modified_col.replace('.', '_')
    # Add the modified column name to the list
    modified_columns.append(modified_col)

# Assign the modified column names back to the dataframe
exploded_df.columns = modified_columns

In [52]:
exploded_df

Unnamed: 0,declarations_declaration_uuid,declarations_declaration_dateDepot,declarant_nom,declarant_prenom,declarant_dateNaissance,qualiteDeclarantForPDF,motif_id,commentaire,nomSociete,evaluation,remuneration,nombreParts,actiConseil,capitalDetenu
0,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
1,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],CREDIT AGRICOLE SA,2910,néant,341,Non,
2,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],AIRBUS,1929,NEANT,20,Non,
3,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],L'OREAL,6552,NEANT,20,Non,
4,fa8d18ec-0db9-4a39-b1f4-caba0c31329b,27/11/2022 18:18:23,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],33027,Néant,1500,,81
170,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],0,Néant,99,,100
171,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,,SCI\n [Données non publiées],0,Néant,5200,,81
172,86dd9c5d-41ec-44aa-b664-f8d8ba099348,10/09/2020 12:31:53,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,


## ultra complete package

In [11]:
def get_stocks_for_url(url):
  df = parse_xml(url)
  df
  columns_to_explode = [
      'declarations_declaration',
  ]

  exploded_df = df

  for column in columns_to_explode:
      exploded_df = not_explode_and_concat(exploded_df, column)

  exploded_df = exploded_df[["declarations_declaration_uuid.text",
  "declarations_declaration_dateDepot.text",
  "declarations_declaration_general.declarant.nom.text",
  "declarations_declaration_general.declarant.prenom.text",
  "declarations_declaration_general.declarant.dateNaissance.text",
  "declarations_declaration_general.qualiteDeclarantForPDF.text",
  "declarations_declaration_participationFinanciereDto.items.items"]]
  columns_to_explode = [
      "declarations_declaration_participationFinanciereDto.items.items"
  ]

  for column in columns_to_explode:
      exploded_df = explode_and_concat(exploded_df, column)
  strings_to_remove = [
      'declarations_declaration_general.',
      'declarations_declaration_participationFinanciereDto.items.items_',
      '.text'
  ]

  # Assuming you have a dataframe named 'exploded_df'

  # Create a new list to store the modified column names
  modified_columns = []

  # Iterate over each column name
  for col in exploded_df.columns:
      modified_col = col
      # Iterate over each string to remove
      for string in strings_to_remove:
          modified_col = modified_col.replace(string, '')

      modified_col = modified_col.replace('.', '_')
      # Add the modified column name to the list
      modified_columns.append(modified_col)

  # Assign the modified column names back to the dataframe
  exploded_df.columns = modified_columns

  return exploded_df

In [12]:
url = "https://raw.githubusercontent.com/louispaulet/hatvp_viz/main/datasets/base/xml_batches/declarations_hatvp_batch_1.xml"
get_stocks_for_url(url)

Unnamed: 0,declarations_declaration_uuid,declarations_declaration_dateDepot,declarant_nom,declarant_prenom,declarant_dateNaissance,qualiteDeclarantForPDF,motif_id,commentaire,nomSociete,evaluation,remuneration,nombreParts,actiConseil,capitalDetenu
0,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
1,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],CREDIT AGRICOLE SA,2910,néant,341,Non,
2,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],AIRBUS,1929,NEANT,20,Non,
3,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],L'OREAL,6552,NEANT,20,Non,
4,fa8d18ec-0db9-4a39-b1f4-caba0c31329b,27/11/2022 18:18:23,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],33027,Néant,1500,,81
170,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,Titre détenu en usufruit,SCI\n [Données non publiées],0,Néant,99,,100
171,29354efc-115c-4505-930a-8e4f4d620f85,23/03/2021 10:55:37,ALEDO,Marcel,11/07/1948,Maire ou adjoint municipal/Clermont-Ferrand (63),CREATION,,SCI\n [Données non publiées],0,Néant,5200,,81
172,86dd9c5d-41ec-44aa-b664-f8d8ba099348,10/09/2020 12:31:53,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,


In [13]:
url = "https://raw.githubusercontent.com/louispaulet/hatvp_viz/main/datasets/base/xml_batches/declarations_hatvp_batch_2.xml"
get_stocks_for_url(url)

Unnamed: 0,declarations_declaration_uuid,declarations_declaration_dateDepot,declarant_nom,declarant_prenom,declarant_dateNaissance,qualiteDeclarantForPDF,motif_id,nomSociete,evaluation,remuneration,capitalDetenu,nombreParts,actiConseil,commentaire
0,7789d319-d572-43cd-8534-20568f036525,26/09/2021 20:40:19,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,
1,2d0e6f00-f37f-4dcd-8b19-75ce0b9348e4,26/09/2021 21:01:41,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,
2,868e37c7-94e2-463c-bd9f-d6acb280da1e,25/01/2022 16:28:30,ALEMAGNA,Claude,21/09/1957,Membre d’EPCI/Dracénie Provence Verdon agglomé...,,,,,,,,
3,346c159c-450a-4497-91e1-3573c67f5d17,23/06/2021 22:04:05,ALEMANI,roger,29/06/1957,Membre d’EPCI/CAE,,,,,,,,
4,35ac5aa0-03b8-4d3b-a12f-d0451dedcd05,07/09/2021 07:27:30,Alemany,Jérôme,19/11/1973,Elu départemental/Loire-Atlantique (44)/Vice-p...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,96990322-f9e6-4fff-84ab-365a71275b68,28/09/2021 12:20:00,AMSLER,Jean-Daniel,03/04/1946,Elu départemental/Val-de-Marne (94)/VICE PRESI...,CREATION,TOTAL ENERGIES SE,2224,159.44,,61,,PEA
144,96990322-f9e6-4fff-84ab-365a71275b68,28/09/2021 12:20:00,AMSLER,Jean-Daniel,03/04/1946,Elu départemental/Val-de-Marne (94)/VICE PRESI...,CREATION,ORANGE,157,9.80,,16,,PTF Titres
145,96990322-f9e6-4fff-84ab-365a71275b68,28/09/2021 12:20:00,AMSLER,Jean-Daniel,03/04/1946,Elu départemental/Val-de-Marne (94)/VICE PRESI...,CREATION,ACCIMMO PIERRE SCPI,28000,850,,150,,Ptf titres BNP paribas
146,96990322-f9e6-4fff-84ab-365a71275b68,28/09/2021 12:20:00,AMSLER,Jean-Daniel,03/04/1946,Elu départemental/Val-de-Marne (94)/VICE PRESI...,CREATION,SOCIETE CENTRALE PREVOIR,690000,26000,,7500,,7000 TITRES DETENUS EN USUFRUIT et 350 en plei...


## handling the complete 100 batches

In [17]:
from tqdm.auto import tqdm

df_list = []

# actually 101 batches
for i in tqdm(range(101)):

  url = f"https://raw.githubusercontent.com/louispaulet/hatvp_viz/main/datasets/base/xml_batches/declarations_hatvp_batch_{i+1}.xml"
  df_list.append(get_stocks_for_url(url))

  0%|          | 0/101 [00:00<?, ?it/s]

In [18]:
# from df list to complete df
complete_df = pd.concat(df_list)

Unnamed: 0,declarations_declaration_uuid,declarations_declaration_dateDepot,declarant_nom,declarant_prenom,declarant_dateNaissance,qualiteDeclarantForPDF,motif_id,commentaire,nomSociete,evaluation,remuneration,nombreParts,actiConseil,capitalDetenu,nomOrganisationConseil,motif_label
0,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,,,
1,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],CREDIT AGRICOLE SA,2910,néant,341,Non,,,
2,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],AIRBUS,1929,NEANT,20,Non,,,
3,4344aaa1-874d-4e6d-9b1a-45f7725b710c,11/07/2022 15:40:13,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],L'OREAL,6552,NEANT,20,Non,,,
4,fa8d18ec-0db9-4a39-b1f4-caba0c31329b,27/11/2022 18:18:23,ABAD,DAMIEN,05/04/1980,Député/Ain(01),CREATION,[Données non publiées],ORANGE,877,néant,83,Non,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,19140875-1488-43e7-95a7-63d0b7212a19,25/06/2021 13:29:08,zuili,nicolas,08/03/1965,Maire ou adjoint municipal/Rouen (76),CREATION,COMPTE TITRES CDN,BNP PARIBAS,10820,0,200,,,,
103,19140875-1488-43e7-95a7-63d0b7212a19,25/06/2021 13:29:08,zuili,nicolas,08/03/1965,Maire ou adjoint municipal/Rouen (76),CREATION,COMPTE TITRE CDN,RENAULT,7073,0,200,,,,
104,19140875-1488-43e7-95a7-63d0b7212a19,25/06/2021 13:29:08,zuili,nicolas,08/03/1965,Maire ou adjoint municipal/Rouen (76),CREATION,COMPTE TITRES CDN,SAINT GOBAIN,11238,0,200,,,,
105,19140875-1488-43e7-95a7-63d0b7212a19,25/06/2021 13:29:08,zuili,nicolas,08/03/1965,Maire ou adjoint municipal/Rouen (76),CREATION,COMPTE TITRES CDN,UNIBAIL RODAMCO,7991,0,100,,,,


In [None]:
complete_df

In [23]:
declaration_nb = len(complete_df['declarations_declaration_uuid'].unique())
print(f'Number of declarations in this database: {declaration_nb}')

Number of declarations in this database: 10050


In [24]:
complete_df.to_csv("/content/drive/MyDrive/AI/HATVP/datasets/hatvp_stocks.csv")