# PR Processing Workflow

Dec. 16, 2023: Refactoring the code of `read_files.ipynb`, to define the pipeline. The code below covers 4 steps (plus one extra step, not used in the final analysis) and assumes that the excel files containing the PR text have already been produced. To extract the text from the html and pdf files, see `read_files.ipynb`. We thus assume that the folder `data` contains one file for each company, with name `{company}.xlsx`, with a column `text` containing the processed text of each PR.

In [1]:
import os
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
import nltk
from nltk.tokenize import sent_tokenize
import pickle
from tqdm.notebook import tqdm
from tabulate import tabulate

from sklearn.feature_extraction.text import CountVectorizer
from bertopic import BERTopic
from bertopic.vectorizers import ClassTfidfTransformer
from bertopic.representation import MaximalMarginalRelevance

from sklearn.metrics.pairwise import cosine_similarity

%load_ext lab_black
%load_ext jupyterlab_notify

<jupyterlab_notify.magics._Notification at 0x2a9525a50>

In [2]:
companies_all = [
    "Acerinox",
    "ACS",
    "Bancosantander",
    "Bankinter",
    "BBVA",
    "Caixa",
    "Colonial",
    "Enagas",
    "Endesa",
    "Ferrovial",
    "Grifols",
    "IAG",
    "Iberdrola",
    "Inditex",
    "Acciona",
    "Arcelormittal",
    "Bancosabadell",
    "Cellnex",
    "Fluidra",
    "Indra",
    "Logista",
    "Melia",
    "Merlin",
    "Naturgy",
    "Red",
    "Repsol",
    "Rovi",
    "Sacyr",
    "Solaria",
    "Telefonica",
]
print(f"Analyzing {len(companies_all)} companies.")

Analyzing 30 companies.


## Code and Auxiliary Functions

In [3]:
def get_sentences(docs_list, stopwords):
    sentences = []
    for doc in docs_list:
        aux = sent_tokenize(doc)
        for s in aux:
            # print("s = ", s)
            tokens = nltk.tokenize.wordpunct_tokenize(s)
            s = " ".join([w for w in tokens if w.isalnum() and w not in stopwords])
            sentences.append(s)
            # print(sentences)
            # input("aka")
    return sentences


def get_embeddings(docs):
    sentence_model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
    embeddings = sentence_model.encode(docs, show_progress_bar=True)
    return embeddings


def get_topics(language, sentences, embeddings, nr_topics=10):

    vectorizer_model = CountVectorizer(
        stop_words="english"
    )  # should be ignored in spanish (stop_words='spanish' does not exist)
    ctfidf_model = ClassTfidfTransformer(
        reduce_frequent_words=True, bm25_weighting=True
    )
    # representation_model = KeyBERTInspired()
    representation_model = MaximalMarginalRelevance(diversity=0.3)
    # topic_model = BERTopic(embedding_model= 'paraphrase-multilingual-MiniLM-L12-v2', representation_model=representation_model,
    #                       vectorizer_model=vectorizer_model, ctfidf_model=ctfidf_model, nr_topics=nr_topics, reduce_frequent_words=True, verbose=True)
    topic_model = BERTopic(
        embedding_model="paraphrase-multilingual-MiniLM-L12-v2",
        representation_model=representation_model,
        vectorizer_model=vectorizer_model,
        ctfidf_model=ctfidf_model,
        nr_topics=nr_topics,
        verbose=True,
    )

    # topic_model = BERTopic(embedding_model= 'LaBSE', representation_model=representation_model, vectorizer_model=vectorizer_model, ctfidf_model=ctfidf_model, nr_topics=nr_topics)

    topics, probs = topic_model.fit_transform(sentences, embeddings)

    return topic_model, topics, probs


def topics_modeling(company):

    for label, language, bert_language in zip(labels, languages, bert_languages):
        print(f"\t - Language : {language}")
        filename = os.path.join(
            "data", "sentences", ("sentences_" + company + "_" + label + ".pkl")
        )
        sentences = pickle.load(open(filename, "rb"))
        print("[S1.] \t Sentences Imported.")

        filename = os.path.join("data", "embeddings", (company + "_" + label + ".pkl"))
        embs = pickle.load(open(filename, "rb"))
        print(
            f"[S2.] \t Embeddings Imported. Starting topic modeling using transformer 'paraphrase-multilingual-MiniLM-L12-v2'..."
        )
        model, topic, prob = get_topics(
            language=language, sentences=sentences, embeddings=embs
        )
        print("[S2.] \t Topics Modeling done.")
        print(model.get_topic_info())
        filename = os.path.join("models", (company + "_" + label + ".bert"))
        model.save(filename)
        print(f"[S3.] \t Model {filename} saved to disk.")
        print("\n")


def sentence_tokenize(company, df):

    print("\n[S1.] Tonekize sentences")
    for label, language, bert_language in zip(labels, languages, bert_languages):
        print(f"\t - Language : {language}")
        doc_list = df[df.language == label].text.values
        stopwords = set(nltk.corpus.stopwords.words(language))
        sentences = get_sentences(doc_list, stopwords)
        filename = os.path.join(
            "data", "sentences", ("sentences_" + company + "_" + label + ".pkl")
        )
        pickle.dump(sentences, open(filename, "wb"))
        print(
            f"\t[S1-{label}.] \t Sentences tonenized saved to disk. Total nr. sentences = {len(sentences)}."
        )
    print("[S1.] Done with tonekize sentences")


def embeddings_creation(company):
    print("\n[S2.] Embeddings creation")
    for label, language, bert_language in zip(labels, languages, bert_languages):
        filename = os.path.join(
            "data", "sentences", ("sentences_" + company + "_" + label + ".pkl")
        )
        sentences = pickle.load(open(filename, "rb"))
        print(f"[S2-{label}.] \t Starting with embeddings creation.")
        embs = get_embeddings(sentences)
        filename = os.path.join("data", "embeddings", (company + "_" + label + ".pkl"))
        print(f"[S2-{label}.] \t Embeddings file {filename} saved to disk.")
        pickle.dump(embs, open(filename, "wb"))
    print("[S2.] Done with embeddings creation")


def pr_2_pr(companies, transformer_type="laBSE", en_to_es=True, save_file=False):
    # sentence transformer to find 1:1 match
    # use spanish as corpus and english for queries

    print(
        f"\t [NOTE] Similarity scores computed using '{transformer_type}' transformer"
    )
    model = SentenceTransformer(transformer_type)

    similarity_scores = []
    nr_ens = []
    nr_ess = []

    print(f"** Sentence Transformers (match) for Company '{company}' **")
    name = company + ".xlsx"
    df = pd.read_excel(os.path.join("data", name))
    df = df[~df.text.isna()]
    nr_en = df[df.language == "en"].shape[0]
    nr_es = df[df.language == "es"].shape[0]
    en_list = df[df.language == "en"].text.to_list()
    en_files = df[df.language == "en"].filename.to_list()
    es_list = df[df.language == "es"].text.to_list()
    es_files = df[df.language == "es"].filename.to_list()
    # stopwords = set(nltk.corpus.stopwords.words("spanish"))

    if en_to_es:
        corpus_embedding = model.encode(es_list, convert_to_tensor=True)
        top_k = min(1, len(es_list))
        corpus_list = es_list
        corpus_files_list = es_files
        query_list = en_list
        query_files_list = en_files
        name = "comparison_" + company + "_en2es.xlsx"
    else:
        corpus_embedding = model.encode(en_list, convert_to_tensor=True)
        top_k = min(1, len(en_list))
        corpus_list = en_list
        corpus_files_list = en_files
        query_list = es_list
        query_files_list = es_files
        name = "comparison_" + company + "_es2en.xlsx"

    best_match = []
    best_score = []
    best_name = []
    for query in tqdm(query_list):
        query_embedding = model.encode(query, convert_to_tensor=True)

        cos_scores = util.cos_sim(query_embedding, corpus_embedding)[0]
        top_results = torch.topk(cos_scores, k=top_k)
        # print("Query:", query)
        # print("---------------------------")
        for score, idx in zip(top_results[0], top_results[1]):
            # print(f'[{idx:4d}]\t{round(score.item(), 3)} | {corpus_list[idx]}')
            best_match.append(idx)
            best_score.append(score.item())
            best_name.append(corpus_files_list[idx])
    if en_to_es:
        print(
            f"{company:15s}\t similarity score from 'en' to 'es' = {np.mean(best_score):.3f}."
        )
    else:
        print(
            f"{company:15s}\t similarity score from 'es' to 'en' = {np.mean(best_score):.3f}."
        )

    similarity_scores.append(np.mean(best_score))
    nr_ens.append(nr_en)
    nr_ess.append(nr_es)
    if save_file:
        df_comparison = (
            pd.DataFrame(
                {
                    "similarity": best_score,
                    "query": query_list,
                    "match": [corpus_list[i] for i in best_match],
                    "idx_match": [int(i) for i in best_match],
                    "query_filename": query_files_list,
                    "match_filename": best_name,
                }
            )
            .reset_index()
            .rename(columns={"index": "idx_query"})
        )
        filename = os.path.join("results/best_matching/", name)
        df_comparison.to_excel(filename, index=False)
        print("Comparison file saved to disk : '{}'.".format(filename))
    return similarity_scores, nr_ens, nr_ess


def get_similarity_scores(company, transformer_type, with_printing=True):
    """Get pr_2_pr similarity score, using a query and finding the best match.

    NOTE: For Rovi, Santander, and Solaria, manually change header of
    column B in the data/company.xlslx excel file (to "filename")"""

    from_en_to_es = [True, False]
    suffixes = ["en2es", "es_to_en"]

    for en_to_es, suffix in zip(from_en_to_es, suffixes):
        similarity_scores, nr_ens, nr_ess = pr_2_pr(
            company, transformer_type, en_to_es=en_to_es, save_file=True
        )

        df_sim = pd.DataFrame(
            {
                "company": company,
                "similarity": similarity_scores,
                "nr_en": nr_ens,
                "nr_es": nr_ess,
            }
        )
        if with_printing:
            print(tabulate(df_sim, headers="keys", tablefmt="fancy_grid"))


def get_summary_similarity(companies):
    nr_ess = np.empty(len(companies))
    nr_ens = np.empty(len(companies))
    sim_en2es = np.empty(len(companies))
    sim_es2en = np.empty(len(companies))

    for i, company in enumerate(tqdm(companies)):
        df_en_es = pd.read_excel(
            f"results/best_matching/comparison_{company}_en2es.xlsx"
        )
        df_es_en = pd.read_excel(
            f"results/best_matching/comparison_{company}_es2en.xlsx"
        )
        nr_ess[i] = df_es_en.shape[0]
        nr_ens[i] = df_en_es.shape[0]
        sim_en2es[i] = df_en_es.similarity.mean()
        sim_es2en[i] = df_es_en.similarity.mean()

    df_temp = pd.DataFrame(
        {
            "company": companies,
            "nr_en": nr_ens,
            "nr_es": nr_ess,
            "similarity_en_to_es": sim_en2es,
            "similarity_es_to_en": sim_es2en,
        }
    ).sort_values(by="company")
    filename = "results/avg_similarity_company_both.xlsx"
    df_temp.to_excel(filename, index=False)
    print(f"Saving summary table to disk file: '{filename}'.")
    print(tabulate(df_temp, headers="keys", tablefmt="fancy_grid"))


def match_topics(company):
    """Try to find the best match between topics of the same company in English and Spanish."""

    for company in companies:
        model_en = BERTopic.load(f"models/{company}_en.bert")
        model_es = BERTopic.load(f"models/{company}_es.bert")
        M = cosine_similarity(
            model_en.topic_embeddings_[1:], model_es.topic_embeddings_[1:]
        )
        idx = np.argmax(M, axis=1).tolist()  # best match for each english topic
        dfE = pd.DataFrame(model_en.get_topic_info()[1:]).rename(
            columns={"Name": "Name EN"}
        )
        dfS = (
            pd.DataFrame(model_es.get_topic_info().loc[[i + 1 for i in idx]])
            .reset_index()
            .rename(columns={"Name": "Best Match ES"})
        )
        dfMatch = (
            pd.concat(
                [dfE.reset_index(), dfS.reset_index(), pd.DataFrame(M.max(axis=1))],
                axis=1,
            )
            .rename(columns={0: "similarity"})
            .drop(columns={"index", "level_0"})
        )
        print(
            f"[{company:>20s}]\tTotal similarity score = {dfMatch.similarity.mean():.3f}"
        )
        dfMatch.to_excel(f"results/topics/topics_match_{company}.xlsx", index=False)
        print(f"Saving file 'results/topics/topics_match_{company}.xlsx' to disk.")

        cols = ["topic_" + str(i) for i in range(dfMatch.shape[0])]
        df_en = pd.DataFrame(model_en.get_topics()).iloc[:, 1:]
        df_es = pd.DataFrame(model_es.get_topics())[idx]
        df_en.columns = cols
        df_es.columns = cols
        pd.concat([df_en, df_es], axis=0).reset_index().to_excel(
            f"results/topics/topics_details_{company}.xlsx", index=False
        )
        print(f"Saving file 'results/topics/topics_details_{company}.xlsx' to disk.")


def topics_on_low_similarity_pr(company, with_printing=True):
    """Extract topics only using PR with low similarity.

    A PR has a low similarity if the similarity score is in the botton 20th percentile.
    """

    suffixes = ["en2es", "es2en"]
    print(f"** Extracting topics on low similarity PR for Company '{company}' **")

    for label, language, bert_language, suffix in zip(
        labels, languages, bert_languages, suffixes
    ):
        print(f"\t - Language : {language}")
        df = pd.read_excel(f"results/best_matching/comparison_{company}_{suffix}.xlsx")
        doc_list = df[df.similarity < np.percentile(df.similarity, q=20)][
            "query"
        ].values
        print(f"\tExtracting {len(doc_list)} PR with low similarity...")

        stopwords = set(nltk.corpus.stopwords.words(language))
        print("[S1-s.] \t Tonekize sentences")
        sentences = get_sentences(doc_list, stopwords)
        filename = os.path.join(
            "data", "sentences", ("sentences_lowsim_" + company + "_" + label + ".pkl")
        )
        pickle.dump(sentences, open(filename, "wb"))
        print(
            "[S1-e.] \t Sentences tonenized saved to disk. Total nr. sentences = {}.".format(
                len(sentences)
            )
        )

        print("[S2-s.] \t Starting with embeddings creation.")
        embs = get_embeddings(sentences)
        filename = os.path.join(
            "data", "embeddings", (company + "_lowsim_" + label + ".pkl")
        )
        print(f"[S2-e.] \t Embeddings file {filename} saved to disk.")
        pickle.dump(embs, open(filename, "wb"))

        embs = pickle.load(open(filename, "rb"))
        print(
            f"[S2.] \t Embeddings imported from {filename}. Starting topic modeling..."
        )
        model, topic, prob = get_topics(
            language=language, sentences=sentences, embeddings=embs
        )
        print("[S2.] \t Topics Modeled")
        print(model.get_topic_info())
        df_topic = pd.DataFrame(model.get_topics()).iloc[:, 1:]
        namefile = f"results/topics/topics_details_lowsim_{company}_{label}.xlsx"
        df_topic.to_excel(namefile, index=False)
        print(f"Topics details for company {company} saved to disk. File '{namefile}'")
        if with_printing:
            print(df_topic)


def print_summary_similarity(companies):

    nr_ess = np.empty(len(companies))
    nr_ens = np.empty(len(companies))
    sim_en2es = np.empty(len(companies))
    sim_es2en = np.empty(len(companies))

    for i, company in enumerate(tqdm(companies)):
        df_en_es = pd.read_excel(
            f"results/best_matching/comparison_{company}_en2es.xlsx"
        )
        df_es_en = pd.read_excel(
            f"results/best_matching/comparison_{company}_es2en.xlsx"
        )
        nr_ess[i] = df_es_en.shape[0]
        nr_ens[i] = df_en_es.shape[0]
        sim_en2es[i] = df_en_es.similarity.mean()
        sim_es2en[i] = df_es_en.similarity.mean()

    df_temp = pd.DataFrame(
        {
            "company": companies,
            "nr_en": nr_ens,
            "nr_es": nr_ess,
            "similarity_en_to_es": sim_en2es,
            "similarity_es_to_en": sim_es2en,
        }
    ).sort_values(by="company")
    df_temp.to_excel("results/avg_similarity_company_both.xlsx", index=False)
    print(tabulate(df_temp, headers="keys", tablefmt="fancy_grid"))

## Main

1. Sentence tokenize: Save sentences in file `data/sentences/company_lang_.pkl`
2. Embeddings creation: Read the file created in step 1, and create embeddings. Saved in file `data/embeddings/company_lang.pkl`
3. Get similarity score for `en2es` and `es2en`. This produces two files, `comparison_company_lang2lang.xlsx`, where `lang2lang`indicates the language of the query (first language) and the best match (second language). **NOTE**: We can choose between two transformers; however, for this task, the `LaBSE` transfomer seems to be the best.
4. Get topics using low similarity PR. For each language, we identify the PR with a similarity score in the bottom 20th percentile, in each language. For this subset of PR, we extract the most representative topics, up to a maximum of 10 topics. **NOTE**: For this task, we use the model `paraphrase-multilingual-MiniLM-L12-v2`, since it seems to provide better performance for the topic modeling task.
5. Extra Tasks:
>- Topic modeling for each company. We store the top 10 topics in an excel file.
>- Topics match for each company. We try to match each english topic with the best match (closest cosine similarity score) among the spanish topics.
Note that these extra tasks do not seem to be too informative. 

In [4]:
%%time
companies = ["Endesa"]
labels = ["en", "es"]
languages = ["english", "spanish"]
bert_languages = ["english", "spanish"]

topics_analysis = False  # does not seem to be too informative

for count, company in enumerate(companies):
    print(f"[{count:2.0f}/{len(companies)}]\t** Sentences and Embeddings Creation for Company '{company}' **")
    name = company + ".xlsx"
    filename = os.path.join("data", name)
    df = pd.read_excel(filename)
    df = df[~df.text.isna()]

    sentence_tokenize(company, df)
    embeddings_creation(company)
    
    #  NOTE: For Rovi, Santander, and Solaria, see comment above
    get_similarity_scores(
      company, transformer_type="laBSE"
    )  # models = "laBSE", "paraphrase-multilingual-MiniLM-L12-v2"

    topics_on_low_similarity_pr(company, with_printing=False)

    if topics_analysis:
        topics_modeling(company)
        match_topics(company)
        
print_summary_similarity(companies_all)

[ 0/1]	** Sentences and Embeddings Creation for Company 'Endesa' **

[S1.] Tonekize sentences
	 - Language : english
	[S1-en.] 	 Sentences tonenized saved to disk. Total nr. sentences = 21158.
	 - Language : spanish
	[S1-es.] 	 Sentences tonenized saved to disk. Total nr. sentences = 28560.
[S1.] Done with tonekize sentences

[S2.] Embeddings creation
[S2-en.] 	 Starting with embeddings creation.


Batches:   0%|          | 0/662 [00:00<?, ?it/s]

[S2-en.] 	 Embeddings file data/embeddings/Endesa_en.pkl saved to disk.
[S2-es.] 	 Starting with embeddings creation.


Batches:   0%|          | 0/893 [00:00<?, ?it/s]

[S2-es.] 	 Embeddings file data/embeddings/Endesa_es.pkl saved to disk.
[S2.] Done with embeddings creation
	 [NOTE] Similarity scores computed using 'laBSE' transformer
** Sentence Transformers (match) for Company 'Endesa' **


  0%|          | 0/714 [00:00<?, ?it/s]

Endesa         	 similarity score from 'en' to 'es' = 0.942.
Comparison file saved to disk : 'results/best_matching/comparison_Endesa_en2es.xlsx'.
╒════╤═══════════╤══════════════╤═════════╤═════════╕
│    │ company   │   similarity │   nr_en │   nr_es │
╞════╪═══════════╪══════════════╪═════════╪═════════╡
│  0 │ Endesa    │     0.941519 │     714 │     965 │
╘════╧═══════════╧══════════════╧═════════╧═════════╛
	 [NOTE] Similarity scores computed using 'laBSE' transformer
** Sentence Transformers (match) for Company 'Endesa' **


  0%|          | 0/965 [00:00<?, ?it/s]

Endesa         	 similarity score from 'es' to 'en' = 0.906.
Comparison file saved to disk : 'results/best_matching/comparison_Endesa_es2en.xlsx'.
╒════╤═══════════╤══════════════╤═════════╤═════════╕
│    │ company   │   similarity │   nr_en │   nr_es │
╞════╪═══════════╪══════════════╪═════════╪═════════╡
│  0 │ Endesa    │     0.905756 │     714 │     965 │
╘════╧═══════════╧══════════════╧═════════╧═════════╛
** Extracting topics on low similarity PR for Company 'Endesa' **
	 - Language : english
	Extracting 143 PR with low similarity...
[S1-s.] 	 Tonekize sentences
[S1-e.] 	 Sentences tonenized saved to disk. Total nr. sentences = 4137.
[S2-s.] 	 Starting with embeddings creation.


Batches:   0%|          | 0/130 [00:00<?, ?it/s]

[S2-e.] 	 Embeddings file data/embeddings/Endesa_lowsim_en.pkl saved to disk.
[S2.] 	 Embeddings imported from data/embeddings/Endesa_lowsim_en.pkl. Starting topic modeling...


2024-01-17 13:31:32,788 - BERTopic - Reduced dimensionality
2024-01-17 13:31:32,861 - BERTopic - Clustered reduced embeddings
2024-01-17 13:31:59,098 - BERTopic - Reduced number of topics from 108 to 10


[S2.] 	 Topics Modeled
   Topic  Count                                       Name
0     -1   1298     -1_customers_mobility_plan_electricity
1      0   1555         0_2023_energy_transition_renewable
2      1    380          1_charging_electric_mobility_grid
3      2    338            2_million_income_euros_dividend
4      3    215      3_euroleague_women_festival_finalists
5      4    159  4_biodiversity_species_conservation_birds
6      5     72                5_thermal_coal_fired_boiler
7      6     59      6_basilica_saint_illumination_vatican
8      7     44               7_donated_masks_aid_families
9      8     17     8_board_corporate_reputation_directors
Topics details for company Endesa saved to disk. File 'results/topics/topics_details_lowsim_Endesa_en.xlsx'
	 - Language : spanish
	Extracting 193 PR with low similarity...
[S1-s.] 	 Tonekize sentences
[S1-e.] 	 Sentences tonenized saved to disk. Total nr. sentences = 6101.
[S2-s.] 	 Starting with embeddings creation.


Batches:   0%|          | 0/191 [00:00<?, ?it/s]

[S2-e.] 	 Embeddings file data/embeddings/Endesa_lowsim_es.pkl saved to disk.
[S2.] 	 Embeddings imported from data/embeddings/Endesa_lowsim_es.pkl. Starting topic modeling...


2024-01-17 13:32:37,338 - BERTopic - Reduced dimensionality
2024-01-17 13:32:37,452 - BERTopic - Clustered reduced embeddings
2024-01-17 13:33:05,019 - BERTopic - Reduced number of topics from 130 to 10


[S2.] 	 Topics Modeled
   Topic  Count                                           Name
0     -1   1995          -1_proyecto_proyectos_prensa_director
1      0   3442             0_descargar_docx_energética_social
2      1    291  1_especies_biodiversidad_conservación_hábitat
3      2    128              2_piano_conciertos_madrid_cultura
4      3     70          3_solares_solar_paneles_fotovoltaicas
5      4     49         4_helicóptero_drones_eléctricas_robots
6      5     41           5_miel_apicultores_apicultor_carmona
7      6     39      6_baterías_batería_condensadores_turbinas
8      7     25                 7_hoteles_silken_hotelera_2022
9      8     21                    8_jaguar_rover_suv_juicebox
Topics details for company Endesa saved to disk. File 'results/topics/topics_details_lowsim_Endesa_es.xlsx'


  0%|          | 0/30 [00:00<?, ?it/s]

╒════╤════════════════╤═════════╤═════════╤═══════════════════════╤═══════════════════════╕
│    │ company        │   nr_en │   nr_es │   similarity_en_to_es │   similarity_es_to_en │
╞════╪════════════════╪═════════╪═════════╪═══════════════════════╪═══════════════════════╡
│  1 │ ACS            │     318 │     323 │              0.969719 │              0.966951 │
├────┼────────────────┼─────────┼─────────┼───────────────────────┼───────────────────────┤
│ 14 │ Acciona        │     887 │    1074 │              0.930277 │              0.895565 │
├────┼────────────────┼─────────┼─────────┼───────────────────────┼───────────────────────┤
│  0 │ Acerinox       │     229 │     236 │              0.93387  │              0.934255 │
├────┼────────────────┼─────────┼─────────┼───────────────────────┼───────────────────────┤
│ 15 │ Arcelormittal  │     447 │     192 │              0.77529  │              0.828281 │
├────┼────────────────┼─────────┼─────────┼───────────────────────┼─────────────