In [81]:
import os
import datetime
import dateutil
import tarfile
import pandas as pd
import pandas.io.sql as sqlio
import requests
import re
import glob
import numpy as np

%matplotlib inline
from matplotlib import pylab as plt

In [82]:
!mkdir -p bourso/
!rm -rf bourso/20*
stream = requests.get('https://www.lrde.epita.fr/~ricou/pybd/projet/bourso.tgz', stream=True)
tarfile.open(fileobj=stream.raw, mode='r|gz').extractall('bourso/') # try 'r:gz' if there is an error

In [83]:
def clean_last(df):
    """last is of object type and sometimes ends with (c) or (s)"""
    return [
        float(re.split("\\(.\\)$", str(x))[0].replace(" ", "").replace(",", "."))
        for x in df["last"]
    ]


def read_bourso_year(year):
    compA = pd.concat(
        {
            dateutil.parser.parse(
                f.split("compA ")[1].split(".bz2")[0]
            ): pd.read_pickle(f)
            for f in glob.glob("bourso/" + year + "/compA*")
        }
    )
    compB = pd.concat(
        {
            dateutil.parser.parse(
                f.split("compB ")[1].split(".bz2")[0]
            ): pd.read_pickle(f)
            for f in glob.glob("bourso/" + year + "/compB*")
        }
    )
    merge = pd.concat([compA, compB])
    merge["last"] = clean_last(merge)
    merge.reset_index(level=1, drop=True, inplace=True)
    merge.rename_axis("date", axis=0, inplace=True)
    # dropping duplicates only checks columns
    merge = merge.reset_index().drop_duplicates().set_index("date")
    merge.set_index("symbol", append=True, inplace=True)
    merge = merge.swaplevel(0, 1).sort_index()

    # delta indicates the volume(number of stock sold) per entry instead of volume which is cumulative per day
    merge["delta"] = np.zeros(len(merge))
    for stock in merge.index.levels[0]:
        merge.loc[(stock, slice(None)), "delta"] = merge.loc[
            (stock, slice(None)), "volume"
        ].diff()

    # filling holes from start of day data and missing data
    merge.loc[merge.delta < 0, "delta"] = 0
    merge.delta = merge.delta.fillna(0)

    # sets the delta of the first entry to its volume instead of 0
    # we should do this but it fails on 2020 and 2023 so it's disabled for now
    # merge.loc[merge.groupby('symbol').head(1).index, 'delta'] = merge.groupby('symbol')['volume'].transform('first')

    return merge


# test = read_bourso_year('2020')
# test.head()

In [84]:
def convert_bourso_daily(df):
    df["turnover"] = df["last"] * df["delta"]

    dates = df.index.get_level_values("date").normalize()
    dates.name = "date"

    df_daily = df.groupby(["symbol", dates]).agg(
        {
            "last": "last",  # Last entry of the day
            "volume": "max",  # Maximum volume of the day
            "name": "first",  # First name entry of the day
            "turnover": "sum",  # Sum of all turnovers in that day
        }
    )
    return df_daily


# tmp = convert_bourso_daily(test)
# tmp

# Euronext

In [85]:
!mkdir -p euronext/
!rm -rf euronext/
stream = requests.get('https://www.lrde.epita.fr/~ricou/pybd/projet/euronext.tgz', stream=True)
tarfile.open(fileobj=stream.raw, mode='r|gz').extractall('euronext/') # try 'r:gz' if there is an error

In [86]:
# starting at some point in 2022 the column names were changed
import dateutil.parser


rename_dict = {
    "Open Price": "Open",
    "High Price": "High",
    "low Price": "Low",
    "last Price": "Last",
    "last Trade MIC Time": "Last Date/Time",
    "Currency": "Trading Currency",
}

# note that some entries in open, high, low, last are just set to -
# ok apparently the currency can be set as 0


def read_euronext_file(path):
    if path.endswith(".csv"):
        return pd.read_csv(path, delimiter="\t")
    return pd.read_excel(path)


def regularize_data_to_numbers(df):
    """last is of object type and sometimes ends with (c) or (s)"""
    df[["Volume", "Turnover"]] = df[["Volume", "Turnover"]].replace("-", 0).fillna(0)
    df["Last"] = [round(float(x), 2) for x in df["Last"]]
    df["Volume"] = [int(x) for x in df["Volume"]]
    df["Turnover"] = [round(float(x), 2) for x in df["Turnover"]]
    return df


def regularize_euronext_empty_columns_fill(df):
    df.rename(columns=rename_dict, inplace=True)
    if "Closing Price" in df.columns:
        df["Last"] = df["Last"].fillna(df["Closing Price"]).fillna(0)
        df.drop(columns=["Closing Price"], inplace=True)
    if "Closing Price DateTime" in df.columns:
        df["Last Date/Time"] = (
            df["Last Date/Time"].fillna(df["Closing Price DateTime"]).fillna(0)
        )
        df.drop(columns=["Closing Price DateTime"], inplace=True)
    return df


def regularize_data_to_string(df, columns):
    for col in columns:
        df[col] = df[col].astype(str).str.strip().str.lower()
    return df


def convert_foreign_currencies_to_eur(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # Normalize currency column
    df["Trading Currency"] = df["Trading Currency"].astype(str).str.strip().str.upper()

    # USD case
    usd_mask = df["Trading Currency"] == "USD"
    if usd_mask.any():
        print(f"🔁 Converting {usd_mask.sum()} rows from USD to EUR at rate 0.91")
        df.loc[usd_mask, "Last"] = df.loc[usd_mask, "Last"] * 0.91

    # GBP case
    gbp_mask = df["Trading Currency"] == "GBP"
    if gbp_mask.any():
        print(f"🔁 Converting {gbp_mask.sum()} rows from GBP to EUR at rate 1.15")
        df.loc[gbp_mask, "Last"] = df.loc[gbp_mask, "Last"] * 1.15

    return df


def read_euronext_year(year):
    eur = pd.concat(
        [
            regularize_euronext_empty_columns_fill(read_euronext_file(f))
            for f in glob.glob("euronext/*" + year + "*")
        ]
    )
    # the first three rows are a preamble that doesnt give us anything
    eur = eur.iloc[3:].reset_index(drop=True)
    eur = eur[
        ~((eur["Last"] == "-") & (eur["Volume"] == "-") & (eur["Turnover"] == "-"))
    ]
    eur = eur[~((eur["Symbol"].isna()))]
    # any remaining '-' in the data we assume to be null or 0
    eur = regularize_data_to_numbers(eur)
    eur = convert_foreign_currencies_to_eur(eur)
    eur = eur.drop_duplicates()
    return eur

In [87]:
# note Hammerson plc and t stamp inc is in gbp and usd respectively

In [88]:
# note that volume in euronext is equivalent to delta in bourso
def get_bourso_matching_df(euronext_df):
    tmp = euronext_df[
        ["Symbol", "Last Date/Time", "ISIN", "Last", "Volume", "Name", "Turnover"]
    ]
    tmp = tmp.rename(
        columns={
            "Symbol": "symbol",
            "Last Date/Time": "date",
            "ISIN": "isin",
            "Last": "last",
            "Volume": "volume",
            "Name": "name",
            "Turnover": "turnover",
        }
    )
    tmp = tmp.set_index(["symbol", "date"])
    tmp = tmp.sort_index()
    return tmp

# Getting Both

In [89]:
# in boursorama the name is 'AIR FRANCE -KLM' and in euronext it's 'AIR FRANCE - KLM' so i'm just going to remove all of the spaces so we can match up the names

In [90]:
isin_name_mapping = {
    "FR0013341781": "2crs",
    "FR0014000T90": "2mxorganic",
    "FR0014000TB2": "teractbs",
    "FR0010557264": "abscience",
    "FR0004040608": "abcarbitrage",
    "FR0013185857": "abeo",
    "FR0000060402": "albioma",
    "FR0012616852": "abionyxpharma",
    "FR0012333284": "abivax",
    "FR0000120404": "accor",
    "FR0000045072": "creditagricole",
    "FR0000064602": "acanthedev",
    "FR0011184241": "adocia",
    "FR0010340141": "adp",
    "FR0012821890": "adux",
    "FR0004152874": "advenis",
    "FR0000053043": "advin",
    "FR0013296746": "advicenne",
    "FR0000031122": "airfranceklm",
    "CH0008853209": "agtarecord",
    "FR0000120073": "airliquide",
    "NL0000235190": "airbus",
    "FR0004180537": "akkatechnologies",
    "FR0010313833": "arkema",
    "US00774B2088": "aerkomm",
    "FR0000053027": "akwel",
    "FR0011038348": "klarsen",
    "FR0000053076": "ada",
    "FR0010457531": "nfty",
    "FR0013247244": "adomos",
    "FR0013452281": "agripower",
    "FR0010641449": "agrogenera",
    "FR0010493510": "acheterlouerfr",
    "FR0013410370": "auplatamininggr",
    "ES0109429037": "ispd",
    "FR0011910652": "anevia",
    "FR0010340711": "aquila",
    "FR0013284627": "adeunis",
    "FR0000079683": "artefact",
    "BE0974334667": "llamagroup",
    "FR0000035305": "bdmultimedia",
    "FR0010202606": "sidetrade",
    "FR0000124414": "gascogne",
    "FR0011005933": "biosynex",
    "FR0013384369": "baikowsk",
    "FR0004174233": "bilend",
    "FR0011041011": "bluelinea",
    "FR0000054421": "bourreliergroup",
    "FR0012816825": "biophytis",
    "FR0004172450": "mint",
    "FR0010907956": "carmat",
    "FR0013297165": "cesar",
    "FR0013530102": "veomgroup",
    "FR0014000JX7": "alchimie",
    "FR0011716265": "crossject",
    "FR0004152882": "clasquin",
    "FR0010425595": "cellectis",
    "FR0013257409": "cofidur",
    "FR0011071570": "cogra",
    "BE0160342011": "coil",
    "FR0012788065": "bio",
    "FR0011648716": "carbios",
    "FR0013398997": "arcure",
    "FR0013505583": "cybergun",
    "FR0013204351": "cybergun",
    "FR0013258662": "ayvens",
    "FR0000185423": "damartex",
    "FR0000066961": "bernardloiseau",
    "FR0013066750": "dbt",
    "FR0013400991": "deltadronebsay",
    "FR0010879056": "deinove",
    "FR0000054132": "delfingen",
    "FR0000060840": "devernois",
    "FR0010377127": "dolfines",
    "FR0010404368": "dls",
    "FR0013331212": "dontnod",
    "FR0010436584": "dnx",
    "FR0011522168": "deltadrone",
    "FR0013088606": "dronevolt",
    "FR0004030708": "encresdubuit",
    "BE0974269012": "adcsiic",
    "IT0005351504": "ediliziacrobatica",
    "FR0013245123": "emovabs",
    "FR0013534617": "ecomiam",
    "FR0013256518": "eurofinscerep",
    "FR0000075343": "euromedisgroupe",
    "FR0013356755": "emovagroup",
    "FR0011915339": "enertime",
    "FR0000045122": "entreprendre",
    "FR0010424697": "entreparticuliers",
    "FR0010465534": "eo2",
    "FR0013240934": "eurobioscientific",
    "FR0011490648": "ecoslops",
    "FR0000035818": "esker",
    "FR0010844001": "eurasiagroupe",
    "FR0000044810": "europlasma",
    "FR0013514114": "europlasma",
    "FR0010246322": "easyvista",
    "FR0004034593": "fashionbair",
    "FR0010221069": "filae",
    "FR0000074759": "fleurymichon",
    "FR0012419307": "pullupentertain",
    "FR0010485268": "fountainepajot",
    "FR0004187367": "freelancecom",
    "FR0013495298": "gaussin",
    "FR0011052257": "globalbioenergies",
    "FR0010452474": "eurogerm",
    "FR0004053510": "genoway",
    "FR0011289198": "globalecopower",
    "FR0000033888": "gevelot",
    "FR0012819381": "groupeguillin",
    "FR0011208693": "goldbygold",
    "FR0013429404": "groupetera",
    "CA4433003064": "h2oinnova",
    "FR0013451044": "hoffmann",
    "FR0000077562": "hiolleindustries",
    "FR0010396309": "hitechpros",
    "FR0000066540": "herige",
    "FR0013219367": "horizontalsoftware",
    "FR0010207795": "harvest",
    "FR0004153930": "aton",
    "FR0005854700": "i2s",
    "FR0011511971": "iceram",
    "FR0000062184": "idsud",
    "FR0013297488": "groupimo",
    "FR0013470168": "implanet",
    "FR0013060100": "immersion",
    "FR0011179886": "intrasen",
    "FR0010908723": "integragen",
    "BE0974299316": "invibesadvertsing",
    "FR0000072597": "itlink",
    "FR0010082305": "ivalis",
    "FR0000182479": "archos",
    "FR0010722819": "kalray",
    "IT0005380438": "mediamaker",
    "FR0013374667": "kkointernational",
    "FR0013156007": "kerlink",
    "FR0004027068": "lansonb",
    "FR0000075442": "groupeldlc",
    "FR0013335742": "cogelec",
    "FR0000033599": "lexibooklinguist",
    "FR0007080254": "leshotelsbaverez",
    "FR0010131409": "wallix",
    "ES0105089009": "lleida",
    "FR0000044943": "logicinstrument",
    "FR0000060451": "installux",
    "FR0013421286": "alphamos",
    "FR0000072993": "netmediagroup",
    "FR0013400835": "marenostrum",
    "FR0004155687": "mastrad",
    "IT0005119109": "mybestgroup",
    "FR0013449857": "mndbsa2019",
    "FR0010353888": "mgidigitalgraph",
    "FR0011049824": "mediantechnologies",
    "FR0000061244": "altheora",
    "FR0004178572": "medicreainternat",
    "FR0010776617": "sapmer",
    "FR0011217710": "methanor",
    "FR0010204453": "mginternational",
    "FR0011051598": "amoeba",
    "FR0004058949": "microwavevision",
    "FR0013270626": "m2",
    "FR0010285965": "1000mercis",
    "FR0013053535": "miliboo",
    "FR0011584549": "mnd",
    "FR0010812230": "siriusmedia",
    "FR0011033083": "moulinvest",
    "FR0004034320": "mrbricolage",
    "IT0004615396": "methorioscapital",
    "FR0013462231": "munic",
    "FR0004032746": "neovacs",
    "FR0011636083": "neolife",
    "FR0013330792": "enensys",
    "FR0010397232": "novacyt",
    "NL0012044762": "tmepharma",
    "FR0013399359": "energisme",
    "FR0000064529": "nscgroupe",
    "FR0004065639": "n",
    "FR0004171346": "nextedia",
    "FR0010220475": "alstom",
    "FR0010330613": "ober",
    "FR0010231860": "o2",
    "FR0014000IN0": "alstom",
    "FR0011766229": "oncodesign",
    "FR0010095596": "valeriotx",
    "FR0013318052": "ordissimo",
    "FR0013231180": "osmozis",
    "FR0013328184": "oxatis",
    "FR0000062978": "dekuple",
    "FR0013204070": "gpeparot(auto)",
    "FR0010785790": "plantadvanced",
    "FR0013479730": "paulicmeunerie",
    "CY0104972217": "primecity",
    "FR0013178712": "cerinnovgroup",
    "FR0000061608": "piscinesdesjoyaux",
    "NL0012191662": "laperlafashion",
    "FR0011191287": "pharnext",
    "ES0105029005": "faceph",
    "BE0974302342": "pharmasimple",
    "FR0000054652": "alesgroupe",
    "FR0011950641": "pixiumvision",
    "FR0000066441": "poujoulat",
    "FR0010211037": "erold",
    "FR0013015583": "poulaillon",
    "FR0010169920": "predilife",
    "FR0004044600": "prismaflexintl",
    "FR0010313486": "prodware",
    "FR0011648971": "quantumgenomics",
    "ES0105118006": "quadpack",
    "FR0010889386": "qwamplify",
    "FR0011858190": "realites",
    "FR0010820274": "reworldmedia",
    "FR0000037640": "rougier",
    "FR0000075954": "riber",
    "FR0010523167": "roctool",
    "FR0013379484": "solutions30",
    "FR0013467123": "safe",
    "FR0000074775": "stradimespacfin",
    "IT0005353484": "seifspa",
    "FR0012596468": "sensorion",
    "FR0000073728": "sermagroup",
    "FR0011464452": "spineguard",
    "GB00B19RTX44": "klimvest",
    "FR0000061582": "siparticipations",
    "FR0011398874": "spineway",
    "FR0010526814": "supersonicimagine",
    "FR0010528059": "streamwide",
    "FR0000033219": "altarea",
    "FR0000075673": "letanneur",
    "FR0011053636": "theblockchaingp",
    "FR0004197747": "theradiag",
    "FR0010278762": "envea",
    "FR0010120402": "theraclion",
    "FR0013286259": "theranexus",
    "LU0394945660": "tooluxsanding",
    "FR0010397901": "trilogiq",
    "FR0004175099": "tronics",
    "FR0010383877": "tt",
    "FR0010395681": "alturinvest",
    "FR0013345493": "biouvgroup",
    "FR0004166197": "evolis",
    "FR0010654087": "txcom",
    "FR0000079147": "u10",
    "FR0011884378": "lucibel",
    "FR0011070457": "ucar",
    "FR0013263878": "umanis",
    "FR0012709160": "unit",
    "FR0010337865": "upergy",
    "FR0011898584": "uvgerm",
    "FR0013254851": "valbiotis",
    "FR0004045847": "voyageursdumonde",
    "FR0004155240": "vergnet",
    "FR0011532225": "vogo",
    "FR0010326090": "vialife",
    "FR0004029478": "visiativ",
    "FR0013481835": "visiomedgroup",
    "FR0010766667": "venteuniquecom",
    "FR0013079092": "weconnect",
    "FR0010688440": "wedia",
    "FR0014000P11": "winfarm",
    "FR0013143872": "witbe",
    "IT0004013725": "safwood",
    "FR0000121725": "dassaultavia",
    "FR0012789667": "amplitudesurgical",
    "FR0004125920": "amund",
    "FR0013258589": "antalis",
    "FR0013469301": "aneviabsaa",
    "FR0013469319": "aneviabsab",
    "LU0569974404": "aperam",
    "KYG6096M1069": "aptorumgroupcla",
    "FR0004037125": "april",
    "FR0004070795": "ardoinstamandb",
    "FR0000039216": "altareit",
    "FR0010481960": "argan",
    "FR0012185536": "artea",
    "FR0000076952": "artoisnom",
    "BE0974289218": "dmsimaging",
    "FR0000076887": "astgroupe",
    "FR0000074148": "assystem",
    "FR0010478248": "atar",
    "FR0000071946": "alten",
    "FR0011992700": "ateme",
    "FR0000076655": "actiagroup",
    "FR0000051732": "atos",
    "FR0000063737": "aubay",
    "FR0000061780": "augroscosmetic",
    "FR0000039232": "aurea",
    "FR0013183589": "aurestechnologies",
    "FR0000066052": "avenirtelecom",
    "FR0013529815": "avenirtelecom",
    "FR0011800218": "cabassegroup",
    "FR0011040500": "axwaysoftware",
    "MC0000031187": "bainsmermona",
    "FR0013258399": "balyo",
    "FR0004023208": "bassac",
    "FR0000120966": "bic",
    "FR0000064123": "baccarat",
    "FR0000072399": "belier",
    "FR0000035164": "beneteau",
    "FR0000062341": "finetangberre",
    "FR0000074072": "bigbeninteractive",
    "FR0013280286": "biomerieux",
    "FR0013517380": "biosynexbsar",
    "FR0000035370": "bastideleconfort",
    "FR0000062150": "bleecker",
    "FR0013480969": "bluelineabsaj",
    "FR0011592104": "blue",
    "FR0000060873": "mbws",
    "FR0000120644": "danone",
    "FR0000078958": "businessetdecis",
    "FR0000131104": "bnpparibasacta",
    "FR0000061129": "boiron",
    "FR0000039299": "bollore",
    "FR0013447653": "bollorenv20",
    "FR0000063935": "bonduelle",
    "FR0011814938": "boostheat",
    "BE0974280126": "biosenic",
    "FR0013507290": "biophytisb",
    "FR0000074254": "boursedirect",
    "FR0013307329": "mintbs",
    "FR0000062788": "barbarabu",
    "FR0000061137": "burelle",
    "FR0006174348": "bureauveritas",
    "FR0000120172": "carrefour",
    "FR0000045528": "crcamparisetidf",
    "FR0010151589": "cafom",
    "FR0000125338": "capgemin",
    "FR0012969095": "capell",
    "FR0010828137": "carmila",
    "FR0000064156": "carpiniennepart",
    "FR0000072894": "cast",
    "FR0000045544": "catoulouse31",
    "FR0010193052": "catanagroup",
    "US1491231015": "caterpillar",
    "FR0000079659": "cambodgenom",
    "FR0000045601": "robertetc",
    "FR0010193979": "cboterritoria",
    "FR0000045619": "robertetcdv87",
    "FR0006239109": "scbsm",
    "FR0000044364": "crcamnormseine",
    "FR0000053324": "alpes(compagnie)",
    "FR0000130403": "christiandior",
    "FR0000036675": "groupecrit",
    "FR0000037475": "cf",
    "FR0013181864": "viridien",
    "FR0013309614": "cggbsa1",
    "FR0013309622": "cggbsa2",
    "FR0000053506": "cegedim",
    "FR0010309096": "vitura",
    "FR0000060907": "chausseria",
    "FR0000054322": "ciboxinteractiv",
    "FR00140003K4": "ciboxbs",
    "FR0000045213": "crcamillevil",
    "FR0013426004": "claranova",
    "FR0012633360": "cellnovo",
    "FR0000045551": "crcammorbihan",
    "FR0000185514": "crcamnord",
    "FR0000120222": "cnpassurances",
    "NL0010949392": "cnova",
    "FR0000125585": "casinoguichard",
    "FR0010667147": "coface",
    "FR0004031763": "coheris",
    "FR0000053399": "cnimgroup",
    "FR0000065393": "courtois",
    "FR0000064578": "covivio",
    "FR0000060303": "coviviohotels",
    "FR0013018124": "nicox",
    "FR0000044323": "crcamalpprov",
    "FR0000185506": "crcamatlvend",
    "FR0010483768": "crcambriepic2",
    "FR0000130692": "chargeurs",
    "FR0010461053": "crcamlangued",
    "FR0000045239": "crcamloirehtel",
    "FR0000050395": "crosswood",
    "FR0000045346": "crcamsudra",
    "FR0000045304": "crcamtouraine",
    "FR0000120628": "axa",
    "FR0013468253": "axanv20",
    "FR0000064446": "cateringintlsces",
    "BMG9887P1068": "zcilimited",
    "BE0974260896": "celyadoncology",
    "FR0013340809": "cybergunbsa1",
    "FR0013340817": "cybergunbsa2",
    "FR0013507977": "cybergunbsak1",
    "FR0013507985": "cybergunbsak2a",
    "FR0013508009": "cybergunbsak2b",
    "FR0013536810": "damartex",
    "FR0000053381": "derichebourg",
    "FR0010417345": "dbvtechnologies",
    "FR0000077919": "jcdecaux",
    "FR0000052623": "dedalusfrance",
    "FR0014000GM6": "delfingen",
    "FR0000125486": "v",
    "GB0002374006": "diageo",
    "FR0012202497": "diagnosticmedical",
    "FR0000035784": "evergreen",
    "FR0013154002": "sartoriusstedbio",
    "FR0011026749": "dalet",
    "FR0013283108": "deltaplusgroup",
    "FR0000065260": "dockpetrambesam",
    "FR0000054199": "stdupont",
    "FR0014000IK6": "dronevolt",
    "FR0012860542": "dronevoltbs",
    "FR0000130650": "dassaultsystemes",
    "FR0013466893": "dassaultsysnv20",
    "FR0000073793": "devoteam",
    "FR0000062507": "finetangberrepf",
    "GA0000121459": "totalenergiesgabon",
    "FR0010099515": "eca",
    "FR0010490920": "europa",
    "FR0010908533": "edenred",
    "FR0010242511": "edf",
    "FR0000064404": "media6",
    "FR0000035719": "electmadagascar",
    "FR0000061475": "eurasiafonc",
    "FR0000036816": "toureiffel",
    "FR0011466069": "ekinops",
    "FR0000121667": "essilorluxottica",
    "FR0000031023": "elecstrasbourg",
    "FR0011950732": "eliorgroup",
    "FR0012435121": "elis",
    "FR0000120503": "bouygues",
    "FR0010208488": "engie",
    "NL0006294274": "euronext",
    "FR0000121147": "forvia",
    "FR0000076861": "acteos",
    "FR0011191766": "eosimaging",
    "FR0000037343": "explosprodchipf",
    "FR0012650166": "nhoa",
    "FR0000131757": "eramet",
    "FR0000038259": "eurofinsscient",
    "FR0014000MR3": "eurofinsscient",
    "FR0013462876": "eurofinsnv20",
    "FR0011471135": "erytechpharma",
    "FR0000120669": "esso",
    "FR0004110310": "esigroup",
    "NL0009272137": "esperite",
    "FR0010221234": "eutelsatcommunic",
    "FR0012789949": "europcarmobility",
    "FR0000054678": "euroressources",
    "FR0013294089": "europlasmabsc",
    "FR0000038499": "fonciereeuris",
    "FR0000064164": "exacomptaclairef",
    "FR0004527638": "exelindustries",
    "FR0000039026": "explosifsprodch",
    "FR0011271600": "fermentalg",
    "FR0012951028": "foncatlandb",
    "FR0000064362": "fonciereatland",
    "FR0013455482": "atland",
    "FR0000063034": "fauvetgirel",
    "FR0000031973": "fayencsarreguem",
    "FR0000121857": "bel",
    "FR0000062101": "fermcasmuncanne",
    "FR0013451333": "fdj",
    "FR0000064784": "peugeotinvest",
    "FR0011665280": "figeacaero",
    "FR0000130452": "eiffage",
    "FR0000050353": "lis",
    "FR0000060824": "financieremarjos",
    "FR0000038184": "fipp",
    "FR0004076891": "groupeflo",
    "FR0000033409": "foncierelyonnai",
    "FR0011476928": "fnacdarty",
    "FR0000035123": "finatis",
    "SN0000033192": "finouestafricain",
    "CI0000053161": "forestiereequat",
    "FR0000120271": "totalenergies",
    "FR0000074197": "uniontechinfor",
    "FR0011277391": "myhotelmatch",
    "FR0013176526": "valeo",
    "FR0010588079": "frey",
    "GB00BDSFG982": "technipfmc",
    "FR0000030611": "galimmo",
    "FR0000034894": "gaumont",
    "FR0011462803": "gaussinbsar",
    "FR0004548873": "bourbon",
    "FR0000032526": "guerbet",
    "FR0013462827": "guerbetnv20",
    "FR0000044471": "ramsaygensante",
    "FR0000053035": "geagrenoblelect",
    "FR0013266764": "geciintlbsara",
    "FR0000079634": "geciintl",
    "FR0010501692": "generixgroup",
    "FR0010533075": "getlink",
    "FR0010040865": "gecina",
    "FR0000072373": "egide",
    "FR0000060790": "signauxgirod",
    "FR0004010338": "groupejaj",
    "FR0013399474": "genkyotex",
    "FR0000130809": "societegenerale",
    "FR0000066672": "glevents",
    "US3696041033": "generalelectric",
    "FR0004163111": "genfit",
    "CH0308403085": "geneuro",
    "FR0000062671": "exailtechnologies",
    "FR0010214064": "gpegrouppizzorno",
    "FR0000037970": "museegrevin",
    "FR0000065971": "grainesvoltz",
    "US36254L1008": "gtbiopharma",
    "FR0011726835": "gtt",
    "FR0000066722": "guillemot",
    "FR0011799907": "genomicvision",
    "FR0000054231": "high",
    "FR0004165801": "hotelsdeparis",
    "FR0004159473": "hexaom",
    "FR0000038531": "hfcompany",
    "FR0012821916": "hipaygroup",
    "FR0000121329": "thales",
    "FR0013466265": "thalesnv20",
    "FR0000065278": "hopscotchgroupe",
    "GB0005405286": "hsbcholdings",
    "MA0000011488": "maroctelecom",
    "FR0000035081": "icade",
    "FR0000051393": "id",
    "FR0010929125": "idlogisticsgroup",
    "US4595061015": "iff",
    "FR0000030827": "ige+xao",
    "FR0004035913": "iliad",
    "FR0000033243": "immobdassault",
    "FR0010341032": "fonciereinea",
    "FR0000071797": "infotel",
    "FR0000066219": "indlefinentrepr",
    "FR0000125346": "ingenicogroup",
    "FR0000064297": "innelecmultimedia",
    "FR0010331421": "innatepharma",
    "FR0010259150": "ipsen",
    "FR0000073298": "ipsos",
    "FR0013466273": "ipsosnv20",
    "FR0000124232": "groupeird",
    "FR0004026151": "itesoft",
    "FR0004024222": "interparfums",
    "FR0000064958": "intexa",
    "FR0013233012": "inventiva",
    "FR0012872141": "jacquesbogart",
    "FR0000033904": "jacquetmetals",
    "FR0013190410": "orchestrapremaman",
    "FR0000121485": "kering",
    "FR0004029411": "keyrus",
    "FR0013381951": "kkointwarranta",
    "FR0000120685": "natixis",
    "FR0004007813": "kaufmanetbroad",
    "FR00140003N8": "korian",
    "FR0010386334": "clariane",
    "FR0000066607": "lacroixgroup",
    "FR0000035263": "lafuma",
    "FR0000032278": "latecoere",
    "FR0000038242": "lumibird",
    "FR0000121295": "lebon",
    "FR0000065930": "fonciere7invest",
    "FR0013030152": "francaiseenergie",
    "FR0000039638": "lafonciereverte",
    "CH0012214059": "holcim",
    "FR0000121964": "klepierre",
    "FR0004156297": "linedataservices",
    "FR0004170017": "lnasante",
    "FR0012938884": "solocalgroup",
    "FR00140006O9": "solocalgroup",
    "FR0013532843": "solocal",
    "FR0013204336": "ldc",
    "FR0006864484": "laurentperrier",
    "FR0010307819": "legrand",
    "FR0000065484": "lectra",
    "FR0000053837": "altamir",
    "FR0013525953": "lucibelbs",
    "FR0013512357": "lumibird",
    "FR0013233475": "lysogene",
    "FR0013484466": "ymagisb",
    "FR0011471291": "ymagis",
    "FR0013525557": "makheiagroupb",
    "FR0013419694": "makheiabsa2019",
    "FR0000030074": "malteriesfcobel",
    "FR0000032302": "manutanintl",
    "FR0000051070": "maureletprom",
    "FR0013404944": "mbwsbsa2022",
    "FR0013066313": "mbwsbsar2023",
    "FR0000121014": "lvmh",
    "FR0011742329": "mcphyenergy",
    "FR0013153541": "maisonsdumonde",
    "FR0013247137": "mediawan",
    "FR0013128907": "mediawanwar",
    "FR0004065605": "medincell",
    "FR0010298620": "memscapregpt",
    "FR0010241638": "mercialys",
    "FR0013483534": "metabolicexbsa21",
    "FR0004177046": "metabolicexplorer",
    "FR0000121204": "wendel",
    "FR0010609263": "maunakeatech",
    "FR0000121261": "michelin",
    "FR0012395457": "rapidopret",
    "FR0010781377": "caire",
    "NL0010273694": "amatheonagr",
    "PTAZR0AM0006": "azoreantech",
    "FR0010621722": "toutabo",
    "FR0010979377": "activiumgroup",
    "FR0012968485": "ashleretmanson",
    "FR0011908045": "ag3",
    "ES0105478004": "agpmalagasocim",
    "FR0013251584": "health",
    "FR0013285103": "airmarine",
    "GB00BJ9M4V82": "ucapitalglobal",
    "FR0013253812": "alveen",
    "FR0013268067": "mld",
    "FR0005057635": "ardoinstamanda",
    "FR0010518605": "silkanrt",
    "FR0010050773": "atoutevites",
    "FR0011896463": "audiencelabs",
    "IT0004812258": "azleasing",
    "FR0010436170": "batlaminerals",
    "FR0013340973": "bluesharkpower",
    "FR0006205019": "lombardetmedot",
    "FR0010717579": "cecuritycom",
    "FR0000037871": "chferdepartement",
    "MC0010000826": "cfmindosuezwealth",
    "FR0010447086": "cheopstechnology",
    "FR0012384907": "cioa",
    "FR0011092089": "maisonclioblue",
    "FR0010959684": "colipays",
    "FR0000060428": "ciedumontblanc",
    "FR0013406881": "cmgcleantech",
    "FR0010972091": "scem",
    "FR0000077828": "consortnt",
    "FR0010035816": "coreplighting",
    "FR0004998318": "courbet",
    "IT0005398877": "cesyntbshares",
    "FR0000051567": "chfervargardn",
    "FR0000077885": "damaris",
    "SE0007045380": "designyourhome",
    "FR0000052920": "dynafond",
    "LU0881232630": "dynexenergy",
    "HK0000038783": "eassonholdings",
    "FR0010536185": "eavs",
    "DE000A0XYM45": "ecolutions",
    "FR0007200100": "eauxderoyan",
    "FR0000052755": "editionsdusigne",
    "FR0010439265": "eduniversal",
    "FR0010945733": "ees",
    "FR0010157115": "eurolandcorporate",
    "FR0010492181": "metalliance",
    "FR0000044745": "everset",
    "FR0012300424": "fd",
    "CH0120879058": "firstcau",
    "FR0013222346": "francesoirgroupe",
    "FR0010823724": "francetourisme",
    "FR0010487272": "finaxo",
    "FR0000053415": "ga",
    "FR0011100759": "galeo",
    "LU1840650458": "guandaopuerinves",
    "FR0012336675": "d2lgroup",
    "PTGVE1AE0009": "gentlemensequity",
    "FR0013371507": "coretech5",
    "FR0000076960": "groupecarnivor",
    "FR0000051302": "hochebainslbain",
    "FR0012336691": "homeconcept",
    "FR0006563904": "hotlimmobnice",
    "DE000A11Q133": "hk",
    "FR0006226791": "hotmajesticcanne",
    "FR0006823092": "partindlesmin",
    "FR0000053738": "hotelim",
    "FR0010312181": "hopening",
    "FR0000064735": "hydrauliquepb",
    "FR0005843125": "hydroexploit",
    "GB00BT9PTW34": "innovadermaplc",
    "FR0000079691": "",
    "FR0011158823": "infoclip",
    "FR0010086371": "imalliance",
    "FR0000065773": "imprimeriechirat",
    "BE6200101556": "iocholding",
    "FR0006859039": "immparisperle",
    "IT0005391161": "innovativerfkspa",
    "IT0005336521": "italyinnovazion",
    "FR0010680033": "jsatechnology",
    "FR0004152700": "well",
    "IT0005324105": "medialab",
    "FR00140007I9": "constructeursbois",
    "FR0004155208": "locasystemintl",
    "FR0000061657": "maisantoinebaud",
    "FR0010328302": "made",
    "FR0010515742": "maisonsd" "aujourd",
    "ES0105447009": "maqadmonurbanas",
    "CI0000000832": "simat",
    "IT0004844848": "mediocreditoeurop",
    "BE6252013725": "condortechnolog",
    "FR0010827741": "magillem",
    "BE0974328602": "metricsinbalance",
    "FR0013230067": "fnptechnologies",
    "FR0010882886": "eduformac",
    "FR0010518936": "mtdfinance",
    "FR0012990968": "mulann",
    "FR0013344223": "natureetlogis",
    "FR0010358507": "newsinvest",
    "FR0004157543": "neocommultimedia",
    "FR0010500363": "miguetetassocies",
    "FR0000185464": "novatechind",
    "FR0013465747": "michelinnv20",
    "FR0013310281": "octopusbiosafety",
    "FR0013266772": "oneexperience",
    "FR0010176115": "olmix",
    "FR0010106039": "bodyone",
    "FR0004174712": "onlineformapro",
    "FR0013072741": "osorbetdamour",
    "FR0000077992": "pactenova",
    "FR0000185928": "parfex",
    "BE0948608451": "photonikecapital",
    "FR0000185480": "phoneweb",
    "FR0000030769": "placoplatre",
    "FR0000061376": "propimmeubles",
    "CA74375L1058": "proventuregold",
    "NL0012650535": "parxmaterials",
    "FR0011040690": "tpsh",
    "FR0010529719": "groupeplusvalues",
    "FR0000077232": "revivalexpansion",
    "FR0000035750": "rousseletcentrif",
    "FR0004175222": "sbt",
    "FR0010961920": "schobrunnparis",
    "FR0013155975": "savonnerienyons",
    "NL0010623518": "sequapetroleum",
    "CH0220529603": "scandinavianhou",
    "FR0010679365": "silc",
    "FR0004038818": "simointernational",
    "GB00BF553726": "microskin",
    "FR0011131788": "smalto",
    "IT0005072811": "semplicementespa",
    "IT0005072167": "sintesispa",
    "FR0000054371": "speedrabbitpizza",
    "FR0013063559": "steamfrance",
    "FR0000063976": "streitmecanique",
    "GB00B8GJBS16": "sumoresourcesplc",
    "FR0011668821": "algreen",
    "FR0000033466": "team",
    "FR0000031866": "tramwaysderouen",
    "FR0000076481": "troischenes",
    "FR0000031106": "trocile",
    "FR0011776889": "umalisgroup",
    "FR0005783503": "unimetalght",
    "FR0013180189": "untoitpourto",
    "BE6201089735": "lv4s",
    "FR0012833770": "valoneo",
    "FR0013419876": "kumulusvape",
    "FR0006174496": "verneycarron",
    "FR0011605617": "foncierevind",
    "FR0010033480": "visionerf",
    "FR0011076595": "televista",
    "FR0010688465": "weaccessgroup",
    "FR0010768770": "weya",
    "ES0105399002": "whitenircajal",
    "ZM0000000037": "zccm",
    "FR0000130213": "lagardere",
    "FR0000053225": "metropoletv",
    "FR0000076986": "moncey(fin)nom",
    "BE0003853703": "montea",
    "US58933Y1055": "merckandco",
    "FR0000060196": "mrm",
    "FR0000039620": "mersen",
    "IE00BJYS1G50": "mainstaymedical",
    "LU1598757687": "arcelormittal",
    "FR0000038606": "manitoubf",
    "FR0000077570": "micropole",
    "FR0014000VN3": "nr21",
    "FR0013482791": "nacon",
    "FR0011341205": "nanobiotix",
    "FR0013018041": "navya",
    "FR0011675362": "neoen",
    "FR0000044448": "nexans",
    "FR0012789386": "nextstage",
    "FR0000120859": "imerys",
    "FR0013282936": "neolifebs",
    "FI0009000681": "nokia",
    "FR0004166155": "nr21",
    "FR0000121691": "nrjgroup",
    "FR0004050250": "neurones",
    "FR0004154060": "netgem",
    "FR0010112524": "nexity",
    "FR0013268042": "prologuebsa2017",
    "FR0014000UO3": "octopus",
    "FR0013381431": "o2ibsa2",
    "FR0000062234": "compagnieodet",
    "FR0010428771": "olgroupe",
    "FR0004050300": "groupeopen",
    "FR0000120321": "loreal",
    "FR0000133308": "orange",
    "FR0000075392": "orap",
    "FR0013522216": "orapib",
    "FR0010609206": "orege",
    "FR0000060535": "fiducialrealest",
    "FR0000184798": "orpea",
    "FR0012127173": "oseimmuno",
    "FR0013054269": "parrotbsa1",
    "FR0013054335": "parrotbsa2",
    "FR0010263202": "paref",
    "FR0012612646": "groupepartouche",
    "FR0004038263": "parrot",
    "FR0011027135": "patrimoineetcomm",
    "FR0011844067": "plantadvancedbs",
    "FR0000053514": "pcas",
    "FR0000061459": "perrier(gerard)",
    "FR0012882389": "equasens",
    "FR0013318813": "paragonid",
    "FR0000066755": "haulottegroup",
    "FR0013495157": "pixiumvision",
    "FR00140009N5": "planetmedia",
    "FR0000124570": "plasticomnium",
    "FR0012432516": "poxel",
    "FR0011627900": "prologbsaar20",
    "FR0011994326": "prologuebsaa2021",
    "FR0000074783": "artmarketcom",
    "FR0000060832": "precia",
    "FR0004052561": "proactis",
    "FR0013398617": "prologueb",
    "FR0010380626": "prologue",
    "FR0000038465": "passat",
    "FR0000060329": "psbindustries",
    "FR0000130577": "publicisgroupe",
    "FR0000312928": "publicisb",
    "FR0013252186": "plastvalloire",
    "FR0012613610": "prodways",
    "FR0000120560": "quadient",
    "FR0000060618": "rallye",
    "FR0013344173": "rochebobois",
    "FR0000039091": "robertet",
    "FR0000130395": "remycointreau",
    "FR0000121634": "colas",
    "FR0013522612": "realites",
    "FR0000121121": "eurazeo",
    "FR0000120693": "pernodricard",
    "FR0000052516": "vilmorin&cie",
    "FR0000052292": "hermesintl",
    "FR0000131906": "renault",
    "FR0013477585": "roctoolbsa2020",
    "FR0000031684": "rothschild&",
    "FR0013269123": "rubis",
    "FR0000120388": "recylex",
    "FR0010451203": "rexel",
    "FR0000060121": "saintjeangroupe",
    "FR0000061418": "fiducialoffsol",
    "FR0000073272": "safran",
    "FR0000060071": "sam",
    "FR0000120578": "sanof",
    "FR0013462652": "sanofinv20",
    "FR0000120107": "savencia",
    "FR0000052680": "oeneo",
    "FR0000039109": "secheenvironnem",
    "FR0010411983": "scor",
    "FR0000032658": "synergie",
    "FR0000065765": "visiodent",
    "FR0000078321": "soditech",
    "FR0011950682": "sergeferrarigroup",
    "FR0004175842": "selectirente",
    "LU0088087324": "ses",
    "FR0010282822": "vusiongroup",
    "FR0010613471": "suez",
    "FR0000038804": "sofibuspatrimoine",
    "FR0010209809": "socfranccasinos",
    "FR0004155000": "groupesfp",
    "FR0000125007": "saintgobain",
    "FR0013183985": "gensightbiologics",
    "FR0000074122": "si",
    "FR0000121709": "seb",
    "AN8068571086": "schlumberger",
    "FR0000065492": "selcodis",
    "FR0013214145": "smcp",
    "FR0010649228": "smaltob",
    "FR0004016699": "smtpc",
    "FR0013199916": "somfy",
    "FR0000072563": "sodifrance",
    "FR0000030140": "sofrag",
    "FR0000065864": "sogeclair",
    "FR0013227113": "soitec",
    "BE0003470755": "solvay",
    "FR0000050809": "soprasteriagroup",
    "FR0000053944": "foncierevolta",
    "FR0000131732": "spircommunica",
    "FR0012757854": "spie",
    "FR0011289040": "sql",
    "FR0013006558": "showroomprive",
    "FR0013523081": "showroomprive",
    "FR0000064271": "stef",
    "NL0000226223": "stmicroelectronics",
    "FR0010949404": "stentys",
    "FR0012790756": "streamwidebs",
    "FR0000121972": "schneiderelectric",
    "FR0000121220": "sodexo",
    "FR0004180578": "swordgroup",
    "FR0007317813": "csgroup",
    "FR0000063307": "tayninh",
    "FR0013505062": "vantiva",
    "FR0010918292": "technicolor",
    "FR0013526225": "vantivabsa2024",
    "FR0013526217": "technicolor",
    "FR0000051807": "teleperformance",
    "FR0004529147": "tess",
    "FR0013295789": "tffgroup",
    "FR0000054900": "tf1",
    "FR0013333432": "thermadorgroupe",
    "FR0000066482": "tipiak",
    "BE0974338700": "titancement",
    "FR0013230612": "tikehaucapital",
    "FR0004188670": "tarkett",
    "FR0013483393": "lestqblanc202007bs",
    "FR0005175080": "transgene",
    "FR0000033003": "touax",
    "FR0005691656": "trigano",
    "FR0000060949": "tivoly",
    "CH0008175645": "televerbier",
    "FR0000054470": "ubisoftentertain",
    "FR0000034548": "unionfinfrance",
    "FR0000121501": "peugeot",
    "FR0000054215": "unibel",
    "FR0013326246": "unibailrodam",
    "FR0000073041": "pierrevacances",
    "FR0000031775": "vicat",
    "FR0000066680": "videlio",
    "FR0013328689": "vergnetb",
    "FR0004186856": "vetoquinol",
    "FR0000124141": "veoliaenviron",
    "FR0000050049": "vieletcompagnie",
    "FR0000031577": "virbac",
    "FR0000127771": "vivendi",
    "FR0013506730": "vallourec",
    "FR0000120354": "vallourec",
    "FR0004056851": "valneva",
    "FR0011472943": "valnevapref",
    "FR0011995588": "voltalia",
    "FR0013322724": "visiomedbsa20181",
    "FR0010291245": "verimatrix",
    "FR0000062796": "vrankenpommery",
    "FR0013447729": "verallia",
    "FR0000062465": "alanallman",
    "FR0004183960": "voluntis",
    "FR0013357621": "wavestone",
    "FR0011981968": "worldline",
    "BE0974310428": "xfab",
    "FR0004034072": "xilamanima",
    "FR0000052870": "xpologistics",
    "IE00BG0HDR01": "vreducationhold",
    "IE00B1FR8863": "greatwesternmin",
    "IE00BF0L3536": "aibgroupplc",
    "FR0014003PZ3": "accoracshares",
    "FR0014003QK3": "accoracwarrants",
    "FR0013368438": "albiomabsaar2018",
    "BE0974278104": "abogroup",
    "FR0013333077": "affluentmedical",
    "FR0014005OJ5": "acticorbiotech",
    "FR0014005AC9": "afyren",
    "FR0014004339": "epango",
    "IT0005450819": "mexedia",
    "GB00BNKGZC51": "amacorpora",
    "FR00140059B5": "audacia",
    "FR0014003711": "obiz",
    "FR0011365907": "boaconcept",
    "FR00140062P9": "cb",
    "FR0014004QR6": "cybergun",
    "FR0014004QZ9": "dolfines",
    "FR0014004974": "enogia",
    "FR0014004362": "entech",
    "FR0014005ZM5": "grolleau",
    "GB00BMDXQ672": "hamiltonglobalop",
    "FR0014001PM5": "hydrogenrefueling",
    "FR0014005IU4": "iceram",
    "FR00140048X2": "ikonisys",
    "FR0014000RP6": "groupimo",
    "FR00140069V2": "groupeberkem",
    "ES0105425005": "kompuestos",
    "FR0013308582": "largo",
    "FR0010844464": "medesispharma",
    "FR0014005XB3": "alphamos",
    "ES0105463006": "makingscience",
    "FR00140050Q2": "mnd",
    "FR0014003XT0": "nflbiosciences",
    "FR0014003J32": "namr",
    "FR0014003T71": "omerdecugis&cie",
    "FR00140012E8": "alstomnv21",
    "FR0011651694": "pherecydespharma",
    "GB00BLG2TX24": "rapidnutri",
    "FR00140043Y1": "spartoo",
    "FR0014006KI3": "altarea",
    "FR00140062B9": "toosla",
    "FR0014003UG3": "voyageursdumonde",
    "BE0974387194": "theravet",
    "FR0014004L86": "dassaultavia",
    "FR0014005AL0": "antininfrapartn",
    "IE00BXC8D038": "applegreenplc",
    "FR0014003U94": "aramisgroup",
    "IE00BD1RP616": "bankofirelandgp",
    "FR0013480985": "bluelineabsay",
    "FR0014003FE9": "believe",
    "FR0014007951": "dronevoltbs26",
    "IE00BWY4ZF18": "cairnhomesplc",
    "FR0014003UJ7": "alpes(compagnie)",
    "FR0014003N77": "coviviohotels",
    "IE0001827041": "crhplcord",
    "FR00140011Y8": "axanv21",
    "FR0014004HM6": "dbt",
    "FR0014003G01": "deetechshares",
    "FR0014003G19": "deetechwarrants",
    "IE00BJMZDW83": "dalatahotelgp",
    "IE0000527006": "datalexplc",
    "IE0000020408": "abbeyplc",
    "IE0007214426": "cplresourcesplc",
    "IE00BLRPQQ22": "donegalinvestment",
    "FR0014003TT8": "dassaultsystemes",
    "FR00140015U7": "dassaultsystnv21",
    "IE0003290289": "fbdholdingsplc",
    "FR00140049Q4": "evergreen",
    "FR0014003UI9": "explosifsprch",
    "FR0014001236": "eurofinsscienv21",
    "FR0014001GY9": "europcarmob",
    "FR0014005DA7": "exclusivenetworks",
    "FR0014003AQ4": "explosifsprodch",
    "FR0014004X25": "groupeflo",
    "IE00BWT6H894": "flutterentertain",
    "FR0014005SB3": "forseepower",
    "FR0014006O40": "frey",
    "FR0014001269": "guerbetnv21",
    "FR0014002A99": "genkyotex",
    "IE0000669501": "glanbiaplc",
    "US3696043013": "geaerospace",
    "IE00BF2NR112": "greencoatren",
    "GB00BY7QYJ50": "draperespritplc",
    "US36254L2097": "gtbiopharma",
    "IE00BD6JX574": "glenveaghpropplc",
    "GB0031477770": "firstderivatives",
    "IE00BGHQ1986": "hiberniareitplc",
    "FR0014003VY4": "hdf",
    "GB00BK7YQK64": "hammersonplc",
    "FR00140015T9": "thalesnv21",
    "GB00BYYN4225": "hostelworldgroup",
    "FR0014004J15": "i2poshares",
    "FR0014004JF6": "deezerwarrants",
    "IE00BWB8X525": "permtsbgphold",
    "FR0014003NO2": "intrasen",
    "FR00140015R3": "ipsosnv21",
    "IE00BLP58571": "irishcontgp",
    "IE00BJ34P519": "irishresprop",
    "IE00BDC5DG00": "kenmareresources",
    "IE0004927939": "kingspangroupplc",
    "IE0004906560": "kerrygroupplc",
    "FR0014004CE4": "lacroixgroup",
    "FR0014004JQ3": "latecoere",
    "FR0012634822": "maatpharma",
    "FR0013525565": "makheiagroupbsb",
    "FR0014001731": "mbws",
    "IE00BD64C665": "mincongroupplc",
    "FR0014005WE9": "agenceauto",
    "FR0014003V77": "airwell",
    "CH0451123589": "beaconsmind",
    "IE00BVGC3741": "malincorpplc",
    "FR0014003RM7": "groupecasol",
    "IT0005402034": "cesyntashares",
    "FR0014000U63": "hopium",
    "ES0105590006": "impulsefitness",
    "FR00140015Q5": "michelinnv21",
    "ES0105534004": "orinoquia",
    "FR0014003I41": "versity",
    "ES0105592002": "scientiaschool",
    "FR00140066X4": "metavisio",
    "FR0014003FN0": "integritasviager",
    "FR00140047H7": "wiziboat",
    "FR0014001202": "mersennv21",
    "FR0014002DH9": "neoen",
    "FR0014006F90": "nhoa",
    "FR0014001PV6": "nr21",
    "IE00B1WV4493": "originentplc",
    "FR0014001YS4": "onxeo",
    "FR0014007183": "ordissimo",
    "GB00B9275X97": "openorphanplc",
    "IE00BF0MZF04": "ormondeminingplc",
    "FR0014005HJ9": "ovh",
    "IE00B4XVDC01": "ovocabioplc",
    "IE00B0Q82B24": "petroneftres",
    "FR0014004EC4": "precia",
    "IE00B66B5T26": "providenceres",
    "FR0013477593": "roctoolbsa20202",
    "FR0014006NE6": "roctool",
    "IE00BYTBXV33": "ryanairholdplc",
    "FR00140012D0": "sanofinv21",
    "FR0014003UB4": "spineguardbs",
    "FR0014002JD5": "saintgobainnv21",
    "GB00B5ZN1N88": "segroplc",
    "IE00B1RR8406": "smurfitkappagp",
    "NL00150001Q9": "stellantis",
    "IE00B1HDWM43": "totalproduceplc",
    "FR00140065G1": "theblockchaingrbs",
    "NL0014559478": "technipenergies",
    "FR00140069U4": "theradiag",
    "GB0001500809": "tullowoilplc",
    "FR00140039U7": "transitionshares",
    "FR0014003AC4": "arvernewarrant",
    "FR0014006979": "tronics",
    "IE00BJ5FQX74": "unipharplc",
    "FR0014005GA0": "veolia",
    "FR0014003O76": "visiativbs",
    "FR0014003N85": "visiativ",
    "FR00140030J9": "vallourec",
    "FR0014005FZ9": "vitura",
    "FR0012532810": "wagaenergy",
    "IE00BDT5KP12": "yewgrovereitplc",
    "CH0043238366": "aryztaag",
    "FR001400AHX6": "abldiagnostics",
    "FR001400AA10": "adomos",
    "FR0014007ZB4": "aelisfarma",
    "FR0014008ZE6": "airfranceklm",
    "FR001400C2C3": "affluentmed",
    "FR00140072P8": "acheterlouerfr",
    "FR001400BJ77": "acheterlouerfr",
    "FR001400AJZ7": "broadpeak",
    "FR0014007LW0": "cb",
    "FR001400AJ60": "charwoodenergy",
    "FR0014009LP0": "deltadrone",
    "FR001400C2Z4": "eniblock",
    "FR001400AEM6": "fillupmedia",
    "FR0014007ND6": "haffnerenergy",
    "FR0014007LQ2": "hunyvers",
    "FR001400A3Q3": "icapeholding",
    "IT0005380602": "sipariomovies",
    "FR0014007XT1": "archos",
    "FR00140077X1": "neovacs",
    "FR0013439627": "groupeokwind",
    "IT0005466963": "racingforce",
    "FR0014005I80": "smaio",
    "FR001400BVK2": "spineway",
    "NL0015000YE1": "tmepharma",
    "IT0005507857": "tatatu",
    "FR0014008D33": "atar",
    "FR001400CFI7": "avenirtelecom",
    "FR001400AV72": "bluelinea",
    "FR0014009353": "bollorenv22",
    "FR0014007HU2": "crossjectbs",
    "FR00140074K5": "axanv22",
    "FR001400AYG6": "deezer",
    "FR00140078T7": "dassaultsysnv22",
    "FR0014008VX5": "euroap",
    "FR0014008WS3": "edf",
    "FR0014006U75": "bouyguesnv22",
    "FR0014008SJ0": "faurecia",
    "FR0014006O73": "eurofinsscienv22",
    "FR001400COS8": "implanetbs2023",
    "FR001400APP5": "fonciereinea",
    "FR00140078W1": "ipsosnv22",
    "FR0014009ON9": "eurekingshares",
    "FR0014009OX8": "eurekingwarrants",
    "FR0014009YQ1": "lhyfe",
    "FR001400AJ45": "michelin",
    "CH1148983609": "bebohealth",
    "BE6333353298": "bonyf",
    "IT0005454175": "glasstopowerb",
    "ES0105664009": "hotelesbestprice",
    "ES0105479002": "ianteinvestments",
    "FR0014006PT9": "mdv",
    "FR001400CM63": "oncodesignpm",
    "ES0105612008": "perseidarenta",
    "FR001400CDB7": "smartgoodthings",
    "FR0014007T10": "vaziva",
    "FR00140085W6": "mrm",
    "FR0014006T94": "sanofinv22",
    "FR0014009TI8": "saintgobainnv22",
    "FR001400BWV7": "technicolorcs",
    "FR001400BMH7": "teract",
    "FR001400B4H9": "pierrevacbsaact",
    "FR001400B4G1": "pierrevacbsacre",
    "FR001400B4D8": "pierrevacances",
    "FR001400J770": "airfranceklm",
    "FR001400FL38": "affluentmedbsar",
    "FR001400JAL7": "adomos",
    "FR001400JAP8": "acheterlouerfr",
    "FR001400IAM7": "boostheat",
    "FR001400DIY6": "cabas",
    "FR001400CF13": "europlasma",
    "FR001400GO75": "florentai",
    "FR001400IV58": "algreen",
    "FR001400GZ72": "algreen",
    "FR001400KO61": "archos",
    "FR001400F2Z1": "lepermislibre",
    "FR001400H3A0": "moncourtierenerg",
    "FR001400HDX0": "neovacs",
    "FR001400JXB0": "pharnext",
    "FR001400BV89": "pharnext",
    "FR001400GUN7": "pharnext",
    "BE0974429624": "pharmasimple",
    "FR001400JX97": "pixiumvision",
    "FR001400F1V2": "safe",
    "FR001400MDW2": "stif",
    "FR001400H2X4": "tonnerdrones",
    "FR001400M9E2": "tmepharma",
    "FR001400GM69": "lucibel",
    "FR001400JXA2": "vergnet",
    "FR001400D5I2": "vergnet",
    "FR001400AXT1": "vinpa",
    "FR001400IUV6": "osmosun",
    "KYG6096M1226": "aptorumgroupcla",
    "FR001400JWR8": "arvernegroup",
    "BE0970179827": "biosenic(sub)c1",
    "FR001400LN79": "biophytisb",
    "FR001400LN87": "biophytis",
    "FR001400D0X2": "cbib",
    "US2220702037": "coty",
    "FR001400ED13": "axanv23",
    "FR001400IAQ8": "diagnosticmedb",
    "FR001400EBA9": "eurofinsscienv23",
    "FR001400GR23": "erold",
    "FR001400GY40": "europlasmab",
    "FR001400MAM9": "frey",
    "FR001400GG91": "globalbioenergie",
    "FR001400M8Z9": "egide",
    "FR001400JY13": "latecoere",
    "FR001400LAB4": "latecoere",
    "FR001400LKR9": "methanor",
    "FR001400IE67": "myhotelmatch",
    "GRS528003007": "theazurselec",
    "ES0105719001": "nortembiogroup",
    "ES0105636007": "jungle21",
    "FR001400MKO4": "construbois",
    "ES0105549002": "montepinologistic",
    "ES0105697009": "mutterventures",
    "NL0015001HZ9": "petservice",
    "FR001400LBS6": "smartgoodthings",
    "ES0105639001": "emben",
    "ES0105704003": "virtualware",
    "FR001400GKP3": "mersen",
    "FR001400GA06": "neoen",
    "FR001400LAA6": "orpea",
    "FR001400H9U5": "osmozis",
    "FR001400K4B1": "phaxiamtx",
    "FR001400ED05": "sanofinv23",
    "FR001400M212": "spineguard",
    "BE0974464977": "syensqo",
    "FR001400I939": "technicolorcs",
    "NL0015001SS1": "tmepharmabsay",
    "FR001400MDR2": "xilamanima",
    "FR001400SOY2": "acheterlouerfr",
    "FR001400OS22": "boostheat",
    "FR001400OLP5": "biophytis",
    "FR001400LO86": "dbt",
    "FR001400SP13": "dolfines",
    "FR001400SVN0": "dronevolt",
    "BE0974497290": "europeanmedicals",
    "FR001400PDG8": "europlasma",
    "FR001400M1R1": "geciintl",
    "FR001400TL40": "louishachette",
    "FR001400MV37": "neovacs",
    "FR001400TG47": "neovacs",
    "FR001400RF99": "netmediagroup",
    "FR001400U4P9": "odysseetechno",
    "FR001400N1P4": "pharnext",
    "GB00BM9PTW47": "rapidnutri",
    "FR001400RKU0": "safe",
    "FR001400N2P2": "spineway",
    "FR0013230950": "lighton",
    "FR001400THA4": "atos",
    "FR001400QJH1": "axwaysoftware",
    "FR001400TO39": "balyo",
    "FR001400U306": "gascogne",
    "FR001400OUH3": "bollorenv24",
    "FR001400O416": "cbibsaa",
    "FR001400SUB7": "cambodgenom",
    "FR001400O408": "cbibsab",
    "FR001400UR90": "crossjetbs27",
    "FR001400OKR3": "casinoguichard",
    "FR001400OJ98": "casinobsa3",
    "FR001400QER1": "nicox",
    "FR001400M7B2": "axanv24",
    "FR001400N0G5": "stdupont",
    "FR001400RE25": "dronevolt",
    "FR001400TN89": "toureiffel",
    "FR001400NLM4": "orpea",
    "FR001400U0Z6": "euromedis",
    "FR001400Q9V2": "exosens",
    "FR001400SU99": "moncey(fin)nom",
    "FR001400TAW3": "egideb",
    "FR001400TB00": "egide",
    "US36254L3087": "gtbiopharma",
    "FR001400MDQ4": "implanet",
    "FR001400SA10": "solocalgroup",
    "FR001400SF56": "ldc",
    "ES0105744009": "andinoglobal",
    "ES0105726006": "elix",
    "FR001400NQB6": "macomptafr",
    "BE0970185881": "montea(sub)c27",
    "FR001400QHI3": "netmediagroup",
    "FR001400OLG4": "alphamos",
    "FR001400QEQ3": "nicoxb",
    "NL0015000HT4": "onwardmedical",
    "FR001400MXO7": "orpea",
    "FR001400PFU4": "planisware",
    "NL0015001W49": "pluxee",
    "FR001400M6Z3": "sanofinv24",
    "FR001400R4B4": "solocalgp",
    "FR001400RIB4": "tonnerdronesb",
    "NL0015001SR3": "tmepharmabsaz",
    "FR001400Q9J7": "veom",
    "FR001400PVN6": "viridien",
    "FR001400T9O4": "weya",
}

In [91]:
def get_unique_isin_name_combinations(df):
    cleaned = df.copy()
    cleaned["isin"] = cleaned["isin"].astype(str).str.strip()
    cleaned["name"] = cleaned["name"].astype(str).str.strip().str.lower()
    return cleaned[["isin", "name"]].drop_duplicates().reset_index(drop=True)


def remove_substrings(series: pd.Series, substrings: list[str]) -> pd.Series:
    cleaned = series.astype(str)
    for sub in substrings:
        cleaned = cleaned.str.replace(sub, "", regex=False)
    return cleaned.str.lower()


def remove_ending_substrings(series: pd.Series, endings: list[str]) -> pd.Series:
    cleaned = series.astype(str)
    for ending in endings:
        cleaned = cleaned.str.rstrip()  # In case of trailing spaces before matching
        cleaned = cleaned.apply(
            lambda x: x[: -len(ending)] if x.endswith(ending) else x
        )
    return cleaned.str.strip()


def remove_starting_substrings(series: pd.Series, prefixes: list[str]) -> pd.Series:
    cleaned = series.astype(str)
    for prefix in prefixes:
        cleaned = cleaned.str.lstrip()  # In case of leading spaces before matching
        cleaned = cleaned.apply(
            lambda x: x[len(prefix) :] if x.startswith(prefix) else x
        )
    return cleaned.str.strip()


def remove_ending_substrings_regex(series: pd.Series, endings: list[str]) -> pd.Series:
    # Escape any special regex characters in endings to match them literally
    escaped_endings = [re.escape(e) for e in endings]

    # Build one regex pattern that matches any ending at the end of a string ($)
    pattern = f"({'|'.join(escaped_endings)})$"

    # Use str.replace with regex=True to remove matching suffixes
    return series.astype(str).str.replace(pattern, "", regex=True).str.strip()


def apply_custom_name_mapping(df: pd.DataFrame, mapping: dict) -> pd.DataFrame:
    df = df.copy()
    df["name"] = df["name"].map(mapping).fillna(df["name"])
    return df

from rapidfuzz import process

def attach_isin_to_boursorama(b_df, e_df, fuzzy_threshold=80):
    # Get unique (isin, name) mapping from Euronext
    isin_name_map = get_unique_isin_name_combinations(e_df)

    # Reset index in Boursorama to access 'name' as column
    b_reset = b_df.reset_index()

    # Merge on the pre-cleaned 'name'
    merged = pd.merge(b_reset, isin_name_map, on="name", how="left")

    unmatched_count = merged["isin"].isna().sum()
    print(f"Initial unmatched rows: {unmatched_count}")

    if unmatched_count > 0:
        print(f"⚠️ {unmatched_count} rows had no matching ISIN. Attempting fuzzy matching...")

        # Perform fuzzy matching for unmatched rows
        unmatched = merged[merged["isin"].isna()].copy()  # Copy to avoid modifying the original DataFrame
        print(f"Number of rows to process with fuzzy matching: {len(unmatched)}")
        isin_name_dict = dict(zip(isin_name_map["name"], isin_name_map["isin"]))

        # Reduce the size of isin_name_dict by filtering based on potential matches
        unmatched_names = unmatched["name"].unique()
        filtered_isin_name_dict = {
            key: value
            for key, value in isin_name_dict.items()
            if any(key.startswith(name[:3]) for name in unmatched_names)  # Example heuristic: match by first 3 characters
        }
        print(f"Reduced ISIN dictionary size from {len(isin_name_dict)} to {len(filtered_isin_name_dict)}")

        # Use rapidfuzz for faster fuzzy matching
        for idx, row in unmatched.iterrows():
            print(f"Processing row index {idx} with name: {row['name']}")
            result = process.extractOne(
                row["name"], filtered_isin_name_dict.keys(), score_cutoff=fuzzy_threshold
            )
            if result is not None:
                match, score, _ = result
                print(f"Fuzzy match result: {match} with score: {score}")
                if match:
                    print(f"Match found for row index {idx}: {match} (score: {score})")
                    merged.at[idx, "isin"] = filtered_isin_name_dict[match]
            else:
                print(f"No suitable match found for row index {idx}")

        unmatched_count_after = merged["isin"].isna().sum()
        print(f"✅ Fuzzy matching reduced unmatched rows to {unmatched_count_after}.")

    return merged.set_index(["symbol", "date"])

def remove_inactive_stocks(df: pd.DataFrame) -> pd.DataFrame:
    # Step 1: Group by symbol and check if turnover is always zero
    inactive_symbols = df.groupby("symbol")["turnover"].apply(lambda x: (x == 0).all())

    # Step 2: Filter out those symbols
    active_symbols = inactive_symbols[~inactive_symbols].index

    # Step 3: Keep only rows with active symbols
    return df[df.index.get_level_values("symbol").isin(active_symbols)]


def get_both(year):
    suffix = [
        "westfield",
        "mo.",
        "ds",
        "we",
        "co",
        "rpfd",
        "sa",
        "i",
        "inc",
        "tion",
        "i18",
        "(exassystembrime)",
        "om.",
        "i14",
        "(societe)",
        "act.a",
        "inv",
        "htls",
        "ds06",
        "mo.",
        "opr",
        "opa",
        "i13",
        "cc",
        "vilcc",
        "i16",
        "nv",
        "se",
        "(ex:eurotunnel)",
        "corp",
        "ltd",
    ]
    prefix = ["srd"]
    bourso_name_convert = {
        "bainsdemermona": "bainsmermona",
        "baccaratn": "baccarat",
        "bigbeninteractiv": "bigbeninteractive",
        "bastideleconfor": "bastideleconfort",
        "cambodgecien": "cambodgen",
        "robertetcie87": "robertetc",
        "robertetcdv": "robertet",
        "casinoguichardperrachon": "casinoguichard",
        "casinoguicper": "casinoguichard",
        "sartoriussted": "sartoriusstedbio",
        "sartoriusbiotech": "sartoriusstedbio",
        "eurofinsscientif": "eurofinsscient",
        "exelindustrie": "exelindustries",
        "deltaplusgrp": "deltaplusgroup",
        "dassaultsys": "dassaultsystemes",
        "euroress": "euroressources",
        "foncierelyonnais": "foncierelyonnaise",
        "pernodricardnv11": "pernodricard",
        "ramsaygenerale": "ramsaygensante",
        "gecinanominatif": "gecinanom",
        "gtt(gaztransportettec)": "gtt",
        "idlogistics": "idlogisticsgroup",
        "igeplusxao": "ige+xao",
        "kering(ex:ppr)": "kering",
        "lebonn": "lebon",
        "linedata": "linedataservices",
        "arcelormittal": "arcelor",
        "malterfrancobel": "malteriesfcobel",
        "maurel&prom": "maureletprom",
        "maurelpr": "maureletprom",
        "maurel": "maureletprom",
        "michelinn": "michelin",
        "michelin(mlnv)": "michelinnv20",
        "metropoletele": "metropoletv",
        "m6metropoletele": "metropoletv",
        "nrjgrp": "nrjgroup",
        "fiducialrealestate": "fiducialrealest",
        "partouche": "groupepartouche",
        "grpepartouche": "groupepartouche",
        "patrimoinecom": "patrimoineetcomm",
        "pharmagestinteract": "pharmagestinter",
        "publicisgrp": "publicisgroupe",
        "plastvaldeloire": "plastvalloire",
        "plastivaloire": "plastvalloire",
        "plastvdeloir": "plastvalloire",
        "eurazeodaanf": "eurazeo",
        "groupesteria": "soprasteriagroup",
        "secheenviron": "secheenvironnem",
        "silic": "silc",
        "soitecpsr16": "soitec",
        "soprasteria": "soprasteriagroup",
        "sqlinr": "sql",
        "stmicroelectr": "stmicroelectronics",
        "schneiderel": "schneiderelectric",
        "schneiderelec": "schneiderelectric",
        "technip": "technipfmc",
        "thermador": "thermadorgroupe",
        "tikehaurt170717": "tikehaucapital",
        "technicolornr": "technicolor",
        "pierreetvacances": "pierrevacances",
        "veoliaenvironnem": "veoliaenviron",
        "veolia": "veoliaenviron",
        "viel": "vieletcompagnie",
        "voltaliart080719": "voltalia",
        "vrankenpommerymonopole": "vrankenpommery",
        "xfabsilicon": "xfab",
        "vrankenpommerymo": "vrankenpommery",
    }
    # suffix_regex = ['i\\d*']

    bourso = convert_bourso_daily(read_bourso_year(year))
    bourso["name"] = remove_substrings(bourso["name"], [" ", "-", ".", "/"])
    bourso["name"] = remove_ending_substrings(bourso["name"], suffix)
    bourso["name"] = remove_starting_substrings(bourso["name"], prefix)
    bourso = apply_custom_name_mapping(bourso, bourso_name_convert)
    # bourso['name'] = remove_ending_substrings_regex(bourso['name'],suffix_regex)

    euro = get_bourso_matching_df(read_euronext_year(year))
    euro["name"] = remove_substrings(euro["name"], [" ", "-", ".", "/"])
    euro["name"] = remove_ending_substrings(euro["name"], suffix)
    euro["name"] = remove_starting_substrings(euro["name"], prefix)
    # euro['name'] = remove_ending_substrings_regex(euro['name'],suffix_regex)
    euro.reset_index(inplace=True)
    euro["date"] = euro["date"].apply(lambda x: dateutil.parser.parse(x))

    euro.set_index("symbol", append=False, inplace=True)
    euro.set_index("date", append=True, inplace=True)
    euro.sort_index()

    bourso = attach_isin_to_boursorama(bourso, euro)

    bourso = remove_inactive_stocks(bourso)
    euro = remove_inactive_stocks(euro)

    bourso["name"] = bourso["isin"].map(isin_name_mapping).fillna(bourso["name"])
    euro["name"] = euro["isin"].map(isin_name_mapping).fillna(euro["name"])
    return bourso, euro

# Generate ISIN:Name combinations

In [92]:
isin_name_mapping = {}

In [93]:
def update_isin_name_dict(lookup_df: pd.DataFrame, mapping: dict) -> dict:
    for _, row in lookup_df.iterrows():
        mapping[row["isin"]] = row["name"]
    return mapping

In [94]:
def isin_name_for_all(map):
    # 2019 has no equivalent euronext year so we're generating them from 2020 onwards
    for i in range(5):
        year = "202" + str(i)
        _, e = get_both(year)
        unique_isin_names = get_unique_isin_name_combinations(e)
        map = update_isin_name_dict(unique_isin_names, map)
    return map

In [None]:
isin_name_mapping = isin_name_for_all(isin_name_mapping)

Initial unmatched rows: 55485
⚠️ 55485 rows had no matching ISIN. Attempting fuzzy matching...
Number of rows to process with fuzzy matching: 55485
Reduced ISIN dictionary size from 932 to 237
Processing row index 932 with name: unibrodamwesstpl
No suitable match found for row index 932
Processing row index 933 with name: unibrodamwesstpl
No suitable match found for row index 933
Processing row index 934 with name: unibrodamwesstpl
No suitable match found for row index 934
Processing row index 935 with name: unibrodamwesstpl
No suitable match found for row index 935
Processing row index 936 with name: unibrodamwesstpl
No suitable match found for row index 936
Processing row index 937 with name: unibrodamwesstpl
No suitable match found for row index 937
Processing row index 938 with name: unibrodamwesstpl
No suitable match found for row index 938
Processing row index 939 with name: unibrodamwesstpl
No suitable match found for row index 939
Processing row index 940 with name: unibrodamwe

In [None]:
# Display the top 20 entries in the isin_name_mapping dictionary
for i, (isin, name) in enumerate(isin_name_mapping.items()):
    if i >= 20:
        break
    print(f"{isin}: {name}")

FR0013341781: 2crs
FR0014000T90: 2mxorganic
FR0014000TB2: 2mxorganicbs
FR0010557264: abscience
FR0004040608: abcarbitrage
FR0013185857: abeo
FR0000060402: albioma
FR0012616852: abionyxpharma
FR0012333284: abivax
FR0000120404: accor
FR0000045072: creditagricole
FR0000064602: acanthedev
FR0011184241: adocia
FR0010340141: adp
FR0012821890: adux
FR0004152874: advenis
FR0000053043: advin
FR0013296746: advicenne
FR0000031122: airfranceklm
CH0008853209: agtarecord
