In [2]:
from sentence_transformers import SentenceTransformer
import numpy as np
import json
import faiss
import pickle
import json

  from .autonotebook import tqdm as notebook_tqdm





In [3]:
# Load data
with open('data/data.json') as f:
    data = json.load(f)

# Prepare data
commands = list({item['command']: item for item in data}.values())  # Get unique commands
command_texts = [
    f"{cmd['command']}: {cmd['description']}. Examples: {' '.join(cmd['examples'])}" 
    for cmd in commands
]
command_ids = [cmd['command'] for cmd in commands]

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Encode commands
command_embeddings = model.encode(command_texts)

# Create FAISS index for efficient search
dimension = command_embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
faiss.normalize_L2(command_embeddings)
index.add(command_embeddings)

# Save the system
model.save('command_search_model')
faiss.write_index(index, 'command_index.faiss')
with open('command_ids.pkl', 'wb') as f:
    pickle.dump(command_ids, f)

# Usage example
def find_command(query, k=3):
    # Encode query
    query_embedding = model.encode([query])
    faiss.normalize_L2(query_embedding)
    
    # Search
    distances, indices = index.search(query_embedding, k)
    
    # Return results
    results = []
    for i in range(k):
        results.append({
            'command': command_ids[indices[0][i]],
            'score': float(distances[0][i])
        })
    return results

# Test
print(find_command("how to check disk space"))

[{'command': 'FREEDISK', 'score': 0.6566748023033142}, {'command': 'DU', 'score': 0.5509358644485474}, {'command': 'CHKNTFS', 'score': 0.4439757466316223}]
