In [1]:
import json
from collections import Counter

def load_tag_index(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

def search_schemes(query, tag_index):
    # Convert query to lowercase and split into words
    query_words = query.lower().split()
    
    # Find matching tags
    matching_tags = [tag for tag in tag_index.keys() if any(word in tag.lower() for word in query_words)]
    
    # Get schemes for matching tags
    matching_schemes = []
    for tag in matching_tags:
        matching_schemes.extend(tag_index[tag])
    
    # Count occurrences of each scheme
    scheme_counts = Counter(matching_schemes)
    
    # Sort schemes by relevance (number of matching tags)
    sorted_schemes = sorted(scheme_counts.items(), key=lambda x: x[1], reverse=True)
    
    return [scheme for scheme, count in sorted_schemes]

# Load the tag index
tag_index = load_tag_index('tag_index.json')

# Example usage


In [3]:
user_query = "education loan abroad"
import openai
# Function to generate tags using OpenAI API
def generate_tags_with_openai(query):
    openai.api_key = 'YOUR_API_KEY_HERE'  # Replace with your actual API key
    
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=f"Generate relevant tags for the query: '{query}'\nTags:",
        max_tokens=50,
        n=1,
        stop=None,
        temperature=0.5,
    )
    
    generated_tags = response.choices[0].text.strip().split(',')
    return [tag.strip().lower() for tag in generated_tags]

# Combine results from tag index and OpenAI-generated tags
def combined_search(query, tag_index):
    # Get results from original search function
    original_results = search_schemes(query, tag_index)
    
    # Generate tags using OpenAI
    ai_generated_tags = generate_tags_with_openai(query)
    
    # Search using AI-generated tags
    ai_results = []
    for tag in ai_generated_tags:
        if tag in tag_index:
            ai_results.extend(tag_index[tag])
    
    # Combine and deduplicate results
    combined_results = list(dict.fromkeys(original_results + ai_results))
    
    return combined_results

# Use the combined search function

results = search_schemes(user_query, tag_index)
print(f"Matching schemes: {results[:5]}")  # Print top 5 results

Matching schemes: ['Pradhan_Mantri_Uchchatar_Shiksha_Protsahan_Yojana:_Central_Sector_Interest_Subsidy_Scheme.json', 'Education_Loan_Scheme-_Credit_Line_1_for_Minority_Community.json', 'Scheme_For_Soft_Loan_To_The_Grant-in-aid__Non-grant_In_Aid_Institutions_For_Maintenance_And_Upgradation_Of_Infrastructure_Facilities.json', 'National_Overseas_Scholarship_For_Scheduled_Caste_Etc._Candidates.json', 'Nirman_Shramik_Avm_Unke_Aashrit_Bacchon_Dwara_Bhartiya_Rajasthan_Prashasnik_Seva_Hetu_Aayojit_Pratibhabhik_Yojana.json']
