In [1]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('brands_flattened_complete.csv', header=0, index_col=False, encoding='utf-8')

# Initialize an empty list to store the sentences
entire_text = []

# Iterate over each row in the DataFrame
for _, row in df.iterrows():
    # Start with the base sentence
    sentence = f"The {row['name-en']} brand has a model called {row['model-name-en']}, which is available as a {row['type-name-en']} type in the years {row['type-years']}."
    entire_text.append(sentence)

    # Add all features 
    for col in df.columns[df.columns.get_loc('Drive axle'):]:
        feature_value = row[col]
        if pd.notna(feature_value):  # Only include non-NaN values
            if row['type-name-en'] != "-": #if has type name 
                if feature_value is True:  
                    entire_text.append(f"the {row['type-name-en']} version of the {row['model-name-en']} model from the {row['name-en']} brand has {col}.")
                elif feature_value is False:  
                    entire_text.append(f"the {row['type-name-en']} version of the {row['model-name-en']} model from the {row['name-en']} brand doesn't have {col}.")
                else:  # For other values (e.g., strings, numbers)
                    entire_text.append(f"the {col} of the {row['type-name-en']} version of the {row['model-name-en']} model from the {row['name-en']} brand is {feature_value}.")
            else:
                if feature_value is True:  
                    entire_text.append(f"the {row['model-name-en']} model from the {row['name-en']} brand has {col}.")
                elif feature_value is False:  
                    entire_text.append(f"the {row['model-name-en']} model from the {row['name-en']} brand doesn't have {col}.")
                else:  # For other values (e.g., strings, numbers)
                    entire_text.append(f"the {col} of the {row['model-name-en']} model from the {row['name-en']} brand is {feature_value}. ")

# Print the first few sentences to verify
print(entire_text[:5])



['The audi brand has a model called Q4-e-tron, which is available as a - type in the years [2024].', 'the Drive axle of the Q4-e-tron model from the audi brand is Four-wheel drive. ', 'the Gearbox of the Q4-e-tron model from the audi brand is Single-speed automatic transmission. ', 'the Battery capacity of the Q4-e-tron model from the audi brand is 84.8. ', 'the Power of the Q4-e-tron model from the audi brand is 313 kW. ']


In [5]:
from sentence_transformers import SentenceTransformer

# Load pre-trained model
model = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-cos-v1')

# Generate embeddings
embeddings = model.encode(entire_text)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/9.25k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:
import faiss
import numpy as np

# Convert embeddings to numpy array
embeddings = np.array(embeddings)

# Create a FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance for similarity

# Add embeddings to the index
index.add(embeddings)


In [9]:
###### check 2 sentence similarity (with cosine)

from sklearn.metrics.pairwise import cosine_similarity

while True:
    q1 = input("\nfirst: ")
    if q1.lower() == "exit":
        break
    q2 = input("\nsecond: ")
    
    embeddings = model.encode([q1, q2])
    similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
    
    print(similarity)  
    


first:  i dont like you

second:  i like you


0.74194



first:  i dont like you

second:  i hate you


0.72622854



first:  exit


In [47]:
###### check 2 sentence distance (with Euclidean )

from scipy.spatial.distance import euclidean

while True:
    q1 = input("\nfirst: ")
    if q1.lower() == "exit":
        break
    q2 = input("\nsecond: ")
    
    embeddings = model.encode([q1, q2])
    similarity = euclidean(embeddings[0], embeddings[1])
    
    print(similarity)  


first:  i like you

second:  i dont like you


0.9280807375907898



first:  i dont like you

second:  i hate you


0.8384539484977722



first:  i need money

second:  i want money


0.7644709348678589



first:  

second:  i need money


1.2917919158935547



first:  i need money

second:  i dont need money


0.6419888138771057



first:  I'm surely in the hurry to have the money

second:  I need money


1.109007716178894



first:  I'm surely in the hurry to have the money

second:  I absolutely do not need the money


1.0349351167678833



first:  exit


In [39]:
import ollama

def retrieve_answer(query, index, flat_data, model):
    # Generate the embedding for the query
    query_embedding = model.encode([query])

    # Search the index for the most similar data
    D, I = index.search(query_embedding, k = 10)  # k is the number of top results to retrieve

    # Get the closest matches from the flat data
    results_with_scores = [(flat_data[i], D[0][idx]) for idx, i in enumerate(I[0])]

    similar_sentences = [result[0] for result in results_with_scores]

     # Initialize the Ollama client
    client = ollama.Client()

    content = f"""You are an intelligent assistant specialized in providing detailed information about cars, their specifications, and features. 
        Your job is to answer user queries accurately and concisely based on the provided context. 
        you must first find the best answer based on the question in the following sentences: {similar_sentences}.
        then answer the user's question correctly and completely"""
    
    # Generate the answer using Gemma3
    response = client.chat(
        model='deepseek-r1:1.5b',
        messages=[{'role': 'user', 'content': content}]
    )

    # Extract and return the answer
    answer = response['message']['content']
    
    return answer

In [None]:
while True:
    query = input("\nYou: ")
    if query.lower() == "exit":
        break

    response = retrieve_answer(query, index, entire_text, model)
    print(response)
   # for result, score in response:
   #     print(f"Result: {result}\nScore: {score}\n")


You:  name some cars with sunroof


<think>
Okay, so I'm trying to figure out how to answer a user's query about car features. The context provided lists several models with 'Sunroof' mentioned in parentheses. My task is to first identify all these models and then provide accurate information based on the given data.

First, I'll list out each model as per the context:

1. Audi Q4-e-tron with Sunroof.
2. Opel Moka with Sunroof.
3. J-company Astra notch back version with Sunroof.
4. J-company Astra full version with Sunroof.
5. Audi 1400J Astra with Sunroof.
6. Audi 1600J Astra with Sunroof.
7. Opel Insignia insignia model with Sunroof.
8. TT-coupe model from Audi, full version doesn't have Sunroof.
9. TT-convertible model from Audi, full version doesn't have Sunroof.

Now, I need to think about the user's possible query. They might be asking about which cars have SunROofs or trying to understand if certain models are available with this feature. Since each entry includes 'Sunroof' in parentheses, it seems like all these 


You:  name models of audi 


<think>
Okay, so I'm trying to figure out what each part means. Let me go through it step by step.

First, there are a bunch of sentences provided about different models by Audi. Each one gives some information like model name, years available, features in certain generations, body types, etc. The user probably wants me to parse all this data accurately and answer questions based on the context.

Let's list out each model with their details:

1. Q5: Full-line package starting from 2024 down to 2015. It also mentions a Generation (Body code) of 1234.
2. Q5-e-tron: A ۴۰ model available in 2024 and 2023. The body type is Small crossover size.
3. Q4-e-tron: Not sure about the details here, but it's a -type in 2024.
4. TT-coupe: Full-type starting from 2011 to 2006. Its body type is Full.
5. TT-convertible: Full-type available since 2023 and prior. Body type is also Small crossover size.
6. Other models like Q5-e-tron, Q4-e-tron don't specify their full details here.

I'm assuming the user 


You:  which cars doesnt have auto lights


<think>
Okay, let me try to figure out how to respond to this query. The user has provided a list of car models with specific features regarding Auto lights in their full versions. They want an answer that accurately describes each model's Autopark.

First, I need to process the information given. Each entry like "the Q4-e-tron model from the Audi brand has Auto lights." or "the mokka model from the Opel brand has Auto lights." needs to be parsed for any special conditions or exclusions, such as full versions not having Auto lights.

I notice that some models, like "the full version of the TT-coupe model from the Audi brand doesn't have Auto lights.", and others like the Q5 model in Audi do. Similarly, Opel models sometimes have Auto lights, but others don't, depending on their full versions.

The user wants a comprehensive answer, so I should structure it to highlight each car's status without ambiguity. I'll need to mention the brand names and any specific details about their full ve


You:  how many cars dont have sunroof


<think>
Okay, so I need to figure out what the best answer is for a given query. The context provided includes several statements about different models from Audi and Opel brands with or without a Sunroof. 

First, I'll read through each statement carefully. Let me list them out:

1. 'the Q4-e-tron model from the audi brand has Sunroof.'
2. 'the ۴۰ version of the Q5-e-tron model from the audi brand has Sunroof.'
3. 'the mokka model from the opel brand has Sunroof.'
4. "the full version of the TT-convertible model from the audi brand doesn't have Sunroof."
5. "the full version of the TT-coupe model from the audi brand doesn't have Sunroof."
6. 'the J-company version of the astra notch back model from the opel brand has Sunroof.'
7. 'the 1400J-company version of the astra model from the opel brand has Sunroof.'
8. 'the 1600J-company version of the astra model from the opel brand has Sunroof.'
9. "the full-s-line-package version of the Q5 model from the audi brand doesn't have Sunroof."
1


You:  which car needs small parking space


<think>
Okay, I need to figure out what the user is asking. They provided a list of sentences each starting with either ' Audi' or 'Odel' brands, along with some specifications about models like Q4-e-tron, ۴۰ version, Body type, and Auto Park status. The user's query is incomplete—it just says "you must first find the best answer based on question in following sentences: ['the Q4-e-tron model from the audi brand has Auto park.', 'the ۴۰ version of the Q5-e-tron model from the audi brand has Auto park.', 'the Body type of the full-s-line-package version of the Q5 model from the audi brand is Small sedan.', "the mokka model from the opel brand doesn't have Auto park.", 'the 1400J-company version of the astra model from the opel brand doesn't have Auto park.', 'the Body type of the full version of the TT-convertible model from the audi brand is Small sedan.', "the 1600J-company version of the astra model from the opel brand doesn't have Auto park.", 'the full version of the TT-coupe model


You:  which car is bigger in size


<think>
Okay, let me try to figure this out. The user has provided several pieces of information about different car models from two brands: Audi and Opel. Each entry gives details like body type, engine displacement, widths, availability years, and maybe other specs.

First, I need to understand what the user is asking. They've given a set of facts, which include:

1. Body types for Audi TT-coupe models in different years.
2. Width measurements for Audi TT-coupe and Opel Insignia models.
3. Engine displacement figures.
4. Availability years of Audi TT-coupe and Opel astra models.

I need to figure out how the user wants me to use this information. They mentioned that each entry is a fact about these cars, so I should be able to answer any question based on these facts accurately and concisely.

Let me start by organizing all the provided data for clarity:

- Audi TT-coupe: Available from 2023 down to 2006. Body type is Small sedan.
- Opel Insignia: Mid-size sedan with a certain year r