### Notebook: Write query and get top N similar documents

In [1]:
import os
import sys
# Get the current working directory (notebooks directory)
current_dir = os.getcwd()

# Go up one level to the project directory
project_dir = os.path.dirname(current_dir)

# Assuming your project structure is as described before
src_path = os.path.abspath(os.path.join(project_dir, 'src'))

# Add the 'src' directory to the Python path
sys.path.append(src_path)

from question_answer_site.question_answer.mongodb import MongoDb
from question_answer_site.question_answer.utils import remove_non_word_chars, clean_text, tokens_to_embeddings, post_process_output, correct_spelling
from question_answer_site.question_answer.config import TOKENIZER, EMBEDDING_MODEL_FNAME, EMBEDDING_MODEL_TYPE, TOKENS_EMBEDDINGS, DOCUMENT_EMBEDDING, \
    DOCUMENT_TOKENS, TOP_N, TRANSFORMER_MODEL_NAME, METHOD, MAX_QUERY_LENGTH, username, password, cluster_url, \
    database_name
from transformers import BertTokenizer, BertForQuestionAnswering, RobertaTokenizer, RobertaForQuestionAnswering

from urllib.parse import quote_plus
import torch
from gensim.models import Word2Vec
import os
import re
import nltk
import spacy
import numpy as np
import time
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
query = "what does the starlink program do?"
print(username, password, cluster_url, database_name)
escaped_username = quote_plus(username)
escaped_password = quote_plus(password)

# use MongoDb class to connect to database instance and get the documents
mongo_db = MongoDb(escaped_username, escaped_password, cluster_url, database_name, "parsed_documents")

new_user_1 password33566 cluster0 question_answer


In [9]:
if mongo_db.connect():
    print(mongo_db.count_documents())

536


In [10]:
# Set the Tokenizer for your specific BERT model variant
bert_base_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
roberta_tokenizer = RobertaTokenizer.from_pretrained("deepset/roberta-base-squad2", add_prefix_space=True)
tokenizers = {'bert': bert_base_tokenizer, 'roberta': roberta_tokenizer}
tokenizer = tokenizers[TOKENIZER]

# Load your trained Word2Vec model
if EMBEDDING_MODEL_TYPE == 'Word2Vec':
    model = Word2Vec.load(
        os.path.join(os.getcwd(), "question_answer", "embedding_models", EMBEDDING_MODEL_FNAME))
elif EMBEDDING_MODEL_TYPE.lower() == 'glove':
    # Load the custom spaCy model
    model = spacy.load(os.path.join("..","src","question_answer_site", "question_answer", "embedding_models",
                                    EMBEDDING_MODEL_FNAME.split(".bin")[0]))

# Specify Candidate token embeddings option
if TOKENS_EMBEDDINGS == "query":
    TOKENS = "tokenized_query"
    EMBEDDINGS = "query_embedding"
elif TOKENS_EMBEDDINGS == "query_search":
    TOKENS = "tokenized_query_search"
    EMBEDDINGS = "query_embedding_search"
else:
    TOKENS = "tokenized_query_search_less_sw"
    EMBEDDINGS = "query_embedding_search_less_sw"

In [11]:
def spell_check(user_query):
    tokenized_query = tokenizer.tokenize(user_query)

    # Group tokens into words
    words = []
    current_word = ""
    for token in tokenized_query:
        if token.startswith("Ġ"):  # Indicates the start of a new word
            if current_word:
                words.append(current_word)
            current_word = token[1:] if token[1:] not in ['(', '[', '{', '/', '\\'] else ''
        else:
            current_word += token if token not in [')', ']', '}', '/', '\\', '?', ".", "!"] else ''
            if token in ['/', '\\']:
                words.append(current_word)
                current_word = ''
    if current_word:
        words.append(current_word)

    # Identify misspelled words not in the embeddings model
    misspelled_words = []
    for word in words:
        # Split punctuation and hyphens from the word
        base_word = "".join(char for char in word if char.isalnum() or char in ["'", "-"])
        if any(list(map(lambda x: not any(x),
                        tokens_to_embeddings(tokenizer.tokenize(base_word), model, RANDOM=False)))):
            # Add the original word to the misspelled_words list
            misspelled_words.append(word)
    # Correct the spelling of misspelled words
    corrected_words = {word: correct_spelling(word) for word in misspelled_words}

    # Replace misspelled words in the original query
    corrected_query = user_query
    for original, corrected in corrected_words.items():
        corrected_query = corrected_query.replace(original, corrected)

    return corrected_query


In [12]:
def process_query(user_query):
    user_query = user_query.lower()

    # clean query for BERT input
    user_query = clean_text(user_query)
    print("Uncorrected query: ", user_query)
    user_query = spell_check(user_query)
    print("Corrected query: ", user_query)

    # clean query for candidate search
    user_query_for_search = remove_non_word_chars(user_query)

    # Tokenize the query for BERT input
    tokenized_query = tokenizer.tokenize(user_query)

    # Tokenize the query for candidate search
    tokenized_query_for_search = tokenizer.tokenize(user_query_for_search)

    # Remove the stop words for the tokenized query for search
    nltk_stop_words = nltk.corpus.stopwords.words('english')
    nltk_stop_words.extend(["Ġ" + word for word in nltk_stop_words])  # Add the roberta modified tokens
    tokenized_query_for_search_less_sw = [token for token in tokenized_query_for_search if
                                          token not in nltk_stop_words]

    # Pad or truncate the query to a fixed length of 20 tokens (BERT input)

    if len(tokenized_query) > MAX_QUERY_LENGTH:
        tokenized_query = tokenized_query[:MAX_QUERY_LENGTH]
    else:
        padding_length = MAX_QUERY_LENGTH - len(tokenized_query)
        tokenized_query = tokenized_query + [tokenizer.pad_token] * padding_length

    # Convert the tokenized query to input IDs and attention mask
    input_ids_query = tokenizer.convert_tokens_to_ids(tokenized_query)
    attention_mask_query = [1] * len(input_ids_query)

    # Convert to tensors
    input_ids_query = torch.tensor(input_ids_query).unsqueeze(0)  # Add batch dimension
    attention_mask_query = torch.tensor(attention_mask_query).unsqueeze(0)  # Add batch dimension

    # Get the query embeddings for the candidate document search
    query_embeddings = tokens_to_embeddings(tokenized_query, model, RANDOM=False)
    query_embeddings_search = tokens_to_embeddings(tokenized_query_for_search, model, RANDOM=False)
    query_embeddings_less_sw = tokens_to_embeddings(tokenized_query_for_search_less_sw, model, RANDOM=False)

    query_data = {
        "query": user_query,
        "input_ids_query": input_ids_query.tolist(),
        "attention_mask_query": attention_mask_query.tolist(),
        "query_search": user_query_for_search,
        "tokenized_query": tokenized_query,
        "tokenized_query_search": tokenized_query_for_search,
        "tokenized_query_search_less_sw": tokenized_query_for_search_less_sw,
        "query_embedding": query_embeddings, #.tolist(),  # Just used for the candidate search
        "query_embedding_search": query_embeddings_search, #.tolist(),  # Just used for the candidate search, cleaned
        "query_embedding_search_less_sw": query_embeddings_less_sw # .tolist()
        # Just used for the candidate search, cleaned more
    }
    # return json.dumps(query_data['query'], indent=2)
    return query_data

In [13]:
query_data = process_query(user_query=query)
print(query_data[ "tokenized_query"])

Uncorrected query:  what does the starlink program do?
Corrected query:  what does the starlink program do?
['Ġwhat', 'Ġdoes', 'Ġthe', 'Ġstar', 'link', 'Ġprogram', 'Ġdo', '?', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']


In [14]:
def get_documents_from_mongo():
    if mongo_db.connect():
        documents = [document for document in mongo_db.iterate_documents()]
        print(f"Total documents: {mongo_db.count_documents()}")
        mongo_db.disconnect()
        return documents
    return []

In [15]:
# METHOD = 'COMBINE_MEAN'
METHOD = 'MEAN_MAX'
def get_topn_docs(documents_list, query_data):
    query_embedding = np.array(query_data[EMBEDDINGS])
    query_tokens = np.array(query_data[TOKENS])

    # remove the paddings from the query
    query_embedding = np.array([emb for emb, token in zip(query_embedding, query_tokens) if token != '[PAD]'])

    # List to store cosine similarity scores and corresponding document filenames
    similarity_scores = []

    for doc in documents_list:
        chunk_embeddings = np.array(doc[DOCUMENT_EMBEDDING])
        chunk_tokens = np.array(doc[DOCUMENT_TOKENS])

        # remove the paddings and unknown tokens from the query
        chunk_embeddings = np.array(
            [emb for emb, token in zip(chunk_embeddings, chunk_tokens) if token not in ['[PAD]', '[UNK]']])

        # Calculate cosine similarity between query_embedding and chunk_embeddings METHOD = 'MEAN_MAX'
        if METHOD == 'MEAN_MAX':
            similarity = cosine_similarity(query_embedding, chunk_embeddings)
            similarity = np.mean(np.max(similarity, axis=1))

        elif METHOD == 'MEAN_MEAN':
            similarity = cosine_similarity(query_embedding, chunk_embeddings)
            similarity = np.mean(similarity)

        # if METHOD == 'COMBINE_MEAN':
        else:
            similarity = cosine_similarity(np.mean(query_embedding, axis=0).reshape(1, -1),
                                           np.mean(chunk_embeddings, axis=0).reshape(1, -1))
            similarity = np.mean(similarity)  # Get the single value out of the array

        # Store similarity score and filename
        similarity_scores.append((similarity, doc))

    # Sort the similarity_scores in descending order based on the similarity score
    if similarity_scores:
        similarity_scores.sort(key=lambda x: x[0], reverse=True)
        # for confidence, parsed_doc_chunk_dict in similarity_scores[:TOP_N]:
        #     print(parsed_doc_chunk_dict['counter'])
        #     print(self.tokenizer.convert_tokens_to_string(parsed_doc_chunk_dict['tokens']))
        #     print(parsed_doc_chunk_dict['Document'])
        #     print(confidence)
        #     print()
        return similarity_scores[:TOP_N]
    return similarity_scores

In [16]:
def get_candidate_docs(query_data):
    documents = get_documents_from_mongo()
    top_n_documents = get_topn_docs(documents_list=documents,
                                         query_data=query_data)

    return top_n_documents

In [17]:
# Get the candidate documents, top_n_documents: (similarity_score, document dictionary)
start_time = time.time()

top_n_documents = get_candidate_docs(query_data)
top_n_documents.sort(key=lambda x: x[1]['counter'])

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken to find top {TOP_N} documents: {elapsed_time} seconds")

Total documents: 536
Time taken to find top 10 documents: 7.174426794052124 seconds


In [29]:
for sim, doc in top_n_documents:
    if len(doc['Document']) < 23:
        print(f"{doc['Document']}\t\t\t\t\t{doc['counter']} \t {sim}")
    elif len(doc['Document']) <= 30:
        print(f"{doc['Document']} \t\t\t\t{doc['counter']} \t {sim}")
    elif len(doc['Document']) <= 36:
        print(f"{doc['Document']} \t\t\t{doc['counter']} \t {sim}")
    elif len(doc['Document']) <= 42:
        print(f"{doc['Document']}\t\t{doc['counter']} \t {sim}")
    elif len(doc['Document']) <= 46:
        print(f"{doc['Document']}\t\t{doc['counter']} \t {sim}")
    else:
        print(f"{doc['Document']}\t{doc['counter']} \t {sim}")

Advanced Extremely High Frequency - Wikipedia.pdf	40 	 0.43302890508182323
Rocket Lab Electron - Wikipedia.pdf 			92 	 0.41054024062193734
Kepler-11 - Wikipedia.pdf 				213 	 0.41192279830281087
Space Based Space Surveillance - Wikipedia.pdf		296 	 0.4049510918797441
James Webb Space Telescope - Wikipedia.pdf		327 	 0.4071029292797257
James Webb Space Telescope - Wikipedia.pdf		353 	 0.429566456857389
James Webb Space Telescope - Wikipedia.pdf		357 	 0.43302890508182323
James Webb Space Telescope - Wikipedia.pdf		360 	 0.4049510918797441
James Webb Space Telescope - Wikipedia.pdf		362 	 0.4071029292797257
Kosmos 2251 - Wikipedia.pdf 				530 	 0.4305808634441024


In [118]:
import pandas as pd
disp_dict = {
    "Document":[],
    "Counter":[],  
    "Simularity":[], 
}
for sim, doc in top_n_documents:
    disp_dict["Document"].append(doc['Document'])
    disp_dict["Counter"].append(doc['counter'])
    disp_dict["Simularity"].append(sim)
    
disp_df = pd.DataFrame(disp_dict)
# disp_df[disp_df['Counter']==39]
# disp_df[disp_df['Document']=="Starlink - Wikipedia.pdf"]
disp_df

# for sim, doc in top_n_documents:
#     if len(doc['Document']) < 23:
#         print(f"{doc['Document']}\t\t\t\t\t{doc['counter']} \t {sim}")
#     elif len(doc['Document']) <= 30:
#         print(f"{doc['Document']} \t\t\t\t{doc['counter']} \t {sim}")
#     elif len(doc['Document']) <= 36:
#         print(f"{doc['Document']} \t\t\t{doc['counter']} \t {sim}")
#     elif len(doc['Document']) <= 42:
#         print(f"{doc['Document']}\t\t{doc['counter']} \t {sim}")
#     else:
#         print(f"{doc['Document']}\t\t{doc['counter']} \t {sim}")

Unnamed: 0,Document,Counter,Simularity
0,Reconnaissance satellite - Wikipedia.pdf,11,0.64522
1,Wideband Global SATCOM - Wikipedia.pdf,14,0.675101
2,Advanced Extremely High Frequency - Wikipedia.pdf,35,0.70284
3,Advanced Extremely High Frequency - Wikipedia.pdf,37,0.664623
4,Advanced Extremely High Frequency - Wikipedia.pdf,38,0.646915
5,Falcon 9 - Wikipedia.pdf,58,0.678308
6,Boeing X-37 - Wikipedia.pdf,165,0.636067
7,Boeing X-37 - Wikipedia.pdf,173,0.636681
8,Communications satellite - Wikipedia.pdf,223,0.654481
9,Falcon Heavy - Wikipedia.pdf,260,0.655848


#### Get specific chunk from Mongodb

In [130]:
def print_view(TYPE, query_info, chunk, counter):
    print(f"Inspecting {TYPE} METHOD...")
    chunk_embeddings = chunk['token_embeddings_less_sw']
    chunk_tokens = chunk['tokens_less_sw']
    query_embedding = query_info["query_embedding_search_less_sw"]
    query_tokens = query_info["tokenized_query_search_less_sw"]
    
    if TYPE == "MEAN_MAX":
        print("Finding the most simlar words in the chunk for each query word...\n")

        sim = cosine_similarity(query_embedding, chunk_embeddings)

        print("Position\tQuery\t\t\tChunk\t\tsim_score")
        for i, s, qt in zip(np.argmax(sim, axis=1), np.max(sim, axis=1), query_tokens):
            print(i)
            if len(chunk_tokens[i]) < 7 and len(qt) < 8:
                print(f"     {i}) \t{qt}\t\t-->\t{chunk_tokens[i]} \t\t{s}")
            elif len(chunk_tokens[i]) < 6:
                print(f"     {i}) \t{qt}\t-->\t{chunk_tokens[i]} \t\t{s}")
            elif len(chunk_tokens[i]) >= 7 and len(qt) < 8:
                print(f"     {i}) \t{qt}\t\t-->\t{chunk_tokens[i]} \t{s}")
            else:
                print(f"     {i}) \t{qt}\t-->\t{chunk_tokens[i]} \t{s}")

        print(f"\nnp.mean(np.max(sim, axis=1))\tsimilarity score between query and {counter} is {np.mean(np.max(sim, axis=1))}")
        print(f"\nnp.mean(sim)\t\t\tsimilarity score between query and {counter} is {np.mean(sim)}")
    
    elif TYPE == "COMBINE_MEAN":
        similarity = cosine_similarity(np.mean(query_embedding, axis=0).reshape(1, -1),
                                       np.mean(chunk_embeddings, axis=0).reshape(1, -1))
        similarity = np.mean(similarity) # Get the single value out of the array
        
        print(f"\nThe average query embedding and average {counter} embedding is {similarity}")
    
    elif TYPE == "MEAN_MEAN":
        sim = cosine_similarity(query_embedding, chunk_embeddings)
        print(f"\nnp.mean(sim) similarity score between query and {filename} is {np.mean(sim)}")
        
    else:
        print(f"TYPE {TYPE} not found")

In [131]:
query_data["query"]
print(query_data['tokenized_query_search_less_sw'])

['Ġdesign', 'Ġx', '37', 'Ġorbital', 'Ġtest', 'Ġvehicle']


In [132]:
query_data.keys()

dict_keys(['query', 'input_ids_query', 'attention_mask_query', 'query_search', 'tokenized_query', 'tokenized_query_search', 'tokenized_query_search_less_sw', 'query_embedding', 'query_embedding_search', 'query_embedding_search_less_sw'])

In [136]:
if mongo_db.connect():
    # cursor = mongo_db.get_collection().find({"Document": "Starlink - Wikipedia.pdf"})
    cursor = mongo_db.get_collection().find({"counter": 169})
    
    mongo_data = list(cursor)
    mongo_db.disconnect()

In [137]:
print(len(mongo_data))
print(mongo_data[0].keys())
print(mongo_data[0]['tokens_less_sw'])
# mongo_data[0]['token_embeddings_less_sw']

1
dict_keys(['_id', 'tokens', 'tokens_less_sw', 'token_embeddings_less_sw', 'Document', 'sha_256', 'counter'])
['scale', 'Ġderivative', 'Ġbo', 'e', 'ing', 'Ġx', 'Ġmeasuring', 'Ġfeet', 'Ġlength', 'Ġfeatures', 'Ġtwo', 'Ġangled', 'Ġtail', 'Ġfins', 'Ġlaunches', 'Ġatop', 'las', 'Ġspace', 'x', 'Ġfal', 'con', 'Ġrocket', 'Ġspace', 'plane', 'Ġdesigned', 'Ġoperate', 'Ġspeed', 'Ġrange', 'Ġmach', 'entry', 'Ġtechnologies', 'Ġdemonstrated', 'Ġinclude', 'Ġimproved', 'Ġthermal', 'Ġprotection', 'Ġsystem', 'Ġenhanced', 'Ġav', 'ionics', 'Ġautonomous', 'Ġguidance', 'Ġsystem', 'Ġadvanced', 'Ġair', 'frame', 'Ġspace', 'plane', "'s", 'Ġthermal', 'Ġprotection', 'Ġsystem', 'Ġbuilt', 'Ġupon', 'Ġprevious', 'Ġgenerations', 'Ġatmospheric', 'entry', 'Ġspacecraft', 'Ġincorporating', 'Ġsil', 'ica', 'Ġceramic', 'Ġtiles', 'Ġav', 'ionics', 'Ġsuite', 'Ġused', 'Ġbo', 'e', 'ing', 'Ġdevelop', 'Ġc', 'st', 'Ġcrew', 'ed', 'Ġspacecraft', 'Ġdevelopment', 'aid', 'Ġdesign', 'Ġdevelopment', 'Ġn', 'asa', "'s", 'Ġorbital', 'Ġspace', '

In [138]:
print_view(METHOD, query_data, mongo_data[0], mongo_data[0]['counter'])

Inspecting MEAN_MAX METHOD...
Finding the most simlar words in the chunk for each query word...

Position	Query			Chunk		sim_score
79
     79) 	Ġdesign		-->	Ġdesign 	1.0000000000000002
5
     5) 	Ġx		-->	Ġx 		1.0
0
     0) 	37		-->	scale 		0.0
84
     84) 	Ġorbital	-->	Ġorbital 	0.9999999999999999
213
     213) 	Ġtest		-->	Ġtest 		1.0
127
     127) 	Ġvehicle	-->	Ġvehicle 	1.0000000000000004

np.mean(np.max(sim, axis=1))	similarity score between query and 169 is 0.8333333333333334

np.mean(sim)			similarity score between query and 169 is 0.051522842245752376


#### Analysis

In [113]:
chunk_embeddings = mongo_data[0]['token_embeddings_less_sw']
chunk_tokens = mongo_data[0]['tokens_less_sw']
query_embedding = query_data["query_embedding_search_less_sw"]
query_tokens = query_data["tokenized_query_search_less_sw"]

In [24]:
chunk_tokens[0]

'Ġstar'

In [23]:
query_data["tokenized_query_search_less_sw"][0]

'Ġstar'

In [29]:
print(chunk_embeddings[0])

[0.03636299818754196, -0.3930419981479645, -0.1429090052843094, -0.46448901295661926, 0.7612259984016418, 0.7617819905281067, -0.35284098982810974, 1.0031119585037231, 1.0688589811325073, -0.4271070063114166, -0.7240110039710999, -0.3708609938621521, 0.2522050142288208, 1.5639139413833618, -0.005681000184267759, -1.0314220190048218, -1.2768089771270752, 0.8234590291976929, -1.1317089796066284, -0.5063470005989075, -0.3803279995918274, -0.6376360058784485, -0.6495620012283325, -0.35387200117111206, -0.01632400043308735, 0.44753599166870117, 0.7902910113334656, -0.9902560114860535, 0.35526400804519653, 0.5142549872398376, 0.20615799725055695, 0.7465569972991943, -0.25756600499153137, -0.9509879946708679, 0.577193021774292, 0.7618569731712341, -0.21814799308776855, 1.155176043510437, -0.11342599987983704, -0.9682440161705017, -0.25894901156425476, 0.5289760231971741, 0.1664550006389618, -0.47958600521087646, -0.1488569974899292, 0.02031799964606762, -0.918524980545044, -0.1954469978809356

In [30]:
print(query_embedding[0])

[0.02706499956548214, 0.11715800315141678, -0.1497579962015152, 0.09549400210380554, 0.788237988948822, 0.044426001608371735, 0.21710899472236633, -1.092947006225586, 0.17413100600242615, -0.5446199774742126, -1.3253979682922363, 0.6600490212440491, 0.8856850266456604, 0.3544749915599823, -0.7906050086021423, 0.30542299151420593, 0.43905699253082275, -1.3396470546722412, 0.07484900206327438, -0.38930198550224304, -1.1445200443267822, 0.16912099719047546, -0.1389389932155609, 0.05334300175309181, -0.07674100250005722, -1.1621359586715698, -0.890516996383667, -0.11214400082826614, -0.5438759922981262, 0.4577699899673462, -0.9184399843215942, -0.03812199831008911, 0.054958999156951904, 0.5922399759292603, -0.04942400008440018, 0.2912740111351013, 0.9680520296096802, 0.2256229966878891, -0.3272770047187805, 1.6780519485473633, 0.2525489926338196, 0.36266499757766724, -0.009165000170469284, -0.5830590128898621, -0.6701769828796387, 0.14175300300121307, 0.2028850018978119, 1.5315450429916382

In [31]:
import numpy as np
from numpy.linalg import norm
 
# define two lists or array
A = np.array(chunk_embeddings[0])
B = np.array(query_embedding[0])
 
print("A:", A)
print("B:", B)
 
# compute cosine similarity
cosine = np.dot(A,B)/(norm(A)*norm(B))
print("Cosine Similarity:", cosine)

A: [ 0.036363   -0.393042   -0.14290901 -0.46448901  0.761226    0.76178199
 -0.35284099  1.00311196  1.06885898 -0.42710701 -0.724011   -0.37086099
  0.25220501  1.56391394 -0.005681   -1.03142202 -1.27680898  0.82345903
 -1.13170898 -0.506347   -0.380328   -0.63763601 -0.649562   -0.353872
 -0.016324    0.44753599  0.79029101 -0.99025601  0.35526401  0.51425499
  0.206158    0.746557   -0.257566   -0.95098799  0.57719302  0.76185697
 -0.21814799  1.15517604 -0.113426   -0.96824402 -0.25894901  0.52897602
  0.166455   -0.47958601 -0.148857    0.020318   -0.91852498 -0.195447
  0.191122    1.20281506]
B: [ 0.027065    0.117158   -0.149758    0.095494    0.78823799  0.044426
  0.21710899 -1.09294701  0.17413101 -0.54461998 -1.32539797  0.66004902
  0.88568503  0.35447499 -0.79060501  0.30542299  0.43905699 -1.33964705
  0.074849   -0.38930199 -1.14452004  0.169121   -0.13893899  0.053343
 -0.076741   -1.16213596 -0.890517   -0.112144   -0.54387599  0.45776999
 -0.91843998 -0.038122    0