In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import wordnet

# Step 1: Load and preprocess the metadata

# Load metadata from a CSV file
metadata = pd.read_csv('metadata.csv')

In [7]:
metadata

Unnamed: 0,title,business_metadata,technical_metadata
0,Customer Analysis,Customer segmentation and behavior analysis,Database: sales_data Schema: analytics Table...
1,Sales Performance,Sales performance metrics and KPIs,Database: sales_data Schema: analytics Table...
2,Product Inventory,Inventory management and stock levels,Database: inventory_data Schema: products Ta...
3,Financial Reports,Financial statements and reports,Database: finance_data Schema: reports Table...
4,Website Analytics,Website traffic and user behavior data,Database: web_data Schema: analytics Table: ...


In [8]:
# Combine relevant metadata fields into a single text field
metadata['combined_metadata'] = metadata['title'] + ' ' + metadata['business_metadata'] + ' ' + metadata['technical_metadata']
metadata

Unnamed: 0,title,business_metadata,technical_metadata,combined_metadata
0,Customer Analysis,Customer segmentation and behavior analysis,Database: sales_data Schema: analytics Table...,Customer Analysis Customer segmentation and be...
1,Sales Performance,Sales performance metrics and KPIs,Database: sales_data Schema: analytics Table...,Sales Performance Sales performance metrics an...
2,Product Inventory,Inventory management and stock levels,Database: inventory_data Schema: products Ta...,Product Inventory Inventory management and sto...
3,Financial Reports,Financial statements and reports,Database: finance_data Schema: reports Table...,Financial Reports Financial statements and rep...
4,Website Analytics,Website traffic and user behavior data,Database: web_data Schema: analytics Table: ...,Website Analytics Website traffic and user beh...


In [9]:
# Step 2: Define a function to generate synonyms for a given query term

def generate_synonyms(query_term):
    synonyms = []
    for syn in wordnet.synsets(query_term):
        for lemma in syn.lemmas():
            synonyms.append(lemma.name())
    return synonyms

In [11]:
# Step 3: Define a function to perform the search

def perform_search(query):
    # Generate synonyms for the query terms
    synonyms = []
    for term in query.split():
        synonyms.extend(generate_synonyms(term))
        
    print(synonyms)
    
    # Combine query terms and synonyms
    search_terms = query + ' ' + ' '.join(synonyms)
    
    # Create TF-IDF vectors for the metadata
    vectorizer = TfidfVectorizer()
    metadata_vectors = vectorizer.fit_transform(metadata['combined_metadata'])
    
    # Create a vector for the search query
    query_vector = vectorizer.transform([search_terms])
    
    # Compute cosine similarity between query vector and metadata vectors
    similarities = cosine_similarity(query_vector, metadata_vectors).flatten()
    
    # Sort the metadata indices based on similarity scores
    indices = similarities.argsort()[::-1]
    
    # Retrieve the top relevant metadata entries
    top_metadata = metadata.iloc[indices][:5]
    
    return top_metadata

In [12]:
# Step 4: Perform a search using the model

# Define the search query
query = "customer analysis"

# Perform the search
results = perform_search(query)

# Display the search results
print("Search Results:")
print(results[['title', 'business_metadata', 'technical_metadata']])

['customer', 'client', 'analysis', 'analysis', 'analytic_thinking', 'analysis', 'analysis', 'analysis', 'psychoanalysis', 'analysis', 'depth_psychology']
Search Results:
               title                            business_metadata  \
0  Customer Analysis  Customer segmentation and behavior analysis   
4  Website Analytics       Website traffic and user behavior data   
3  Financial Reports             Financial statements and reports   
2  Product Inventory        Inventory management and stock levels   
1  Sales Performance           Sales performance metrics and KPIs   

                                  technical_metadata  
0  Database: sales_data  Schema: analytics  Table...  
4  Database: web_data  Schema: analytics  Table: ...  
3  Database: finance_data  Schema: reports  Table...  
2  Database: inventory_data  Schema: products  Ta...  
1  Database: sales_data  Schema: analytics  Table...  


In [15]:
generate_synonyms("customer")

['customer', 'client']

In [16]:
generate_synonyms("analysis")

['analysis',
 'analysis',
 'analytic_thinking',
 'analysis',
 'analysis',
 'analysis',
 'psychoanalysis',
 'analysis',
 'depth_psychology']