In [1]:
import pandas as pd
import pickle
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def main():
    #--> Reading preprocessed Data
    df_updated = pd.read_csv(
        filepath_or_buffer = "../Preprocessed/processed_dataset.csv",
    )

    # Creating corpus of short description and long description
    short_description_corpus = list(df_updated["WO Description"].drop_duplicates())
    cause_object_corpus = list(df_updated["Cause object"].drop_duplicates())
    
    # Import the Symmetric SentenceTransformer model
    symmetric_embedder = SentenceTransformer("all-MiniLM-L6-v2")

    # Import the Asymmetric SentenceTransformer model
    asymmetric_embedder = SentenceTransformer("msmarco-distilbert-base-v4") 

    # Create embedded corpus for preprocessed data - short description
    short_description_corpus_embeddings = symmetric_embedder.encode(
                                sentences = short_description_corpus, 
                                convert_to_tensor=True
    )

    # Create embedded corpus for preproocessed data - cause object
    cause_object_corpus_embeddings = symmetric_embedder.encode(
                                sentences = cause_object_corpus, 
                                convert_to_tensor=True
    )

    # Dump the models into output directory
    pickle.dump( symmetric_embedder, 
        open(("symmetric_model.pkl"),"wb")
    )
    
    pickle.dump(asymmetric_embedder, 
        open(("asymmetric_model.pkl"),"wb")
    ) 

    # Dump the embedding corpus to output directory
    pickle.dump(short_description_corpus_embeddings, 
        open(("short_description_corpus_data.pkl"),"wb")
    )
    
    pickle.dump(cause_object_corpus_embeddings, 
        open(("cause_object_corpus_data.pkl"),"wb")
    )

In [3]:
main()

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
