In [None]:
import pandas as pd

# Function to calculate match scores for each keyword
def calculate_match_scores(keywords, text):
    # Convert both keywords and text to lowercase for case-insensitive matching
    text_lower = text.lower()
    
    # Initialize a list to store match scores for each keyword
    match_scores = []
    
    for keyword in keywords:
        keyword_parts = keyword.lower().split()
        total_parts = len(keyword_parts)
        matched_parts = sum(keyword_part in text_lower for keyword_part in keyword_parts)
        match_score = matched_parts / total_parts if total_parts > 0 else 0.0
        match_scores.append(match_score)
    
    return match_scores

# Load the dataset with keywords (assuming it's in a separate Excel file)
keywords_excel_path = 'path_to_keywords_excel_file.xlsx'
keywords_data = pd.read_excel(keywords_excel_path)

# Load your dataset with text to match against (assuming it's in a separate Excel file)
text_data_excel_path = 'path_to_text_data_excel_file.xlsx'
text_data = pd.read_excel(text_data_excel_path)

# Specify the columns for keywords and text
keyword_column = 'keywords'  # Column containing keywords in the keywords Excel file
text_column = 'text_column'  # Column containing text to match against in the text data Excel file

# Initialize an empty list to store the matched data
matched_data = []

# Iterate through both datasets and match keywords with text
for keywords_index, keywords_row in keywords_data.iterrows():
    name = keywords_row['name']  # Assuming 'name' is a column in the keywords Excel file
    address = keywords_row['address']  # Assuming 'address' is a column in the keywords Excel file
    keywords = keywords_row[keyword_column].split()  # Assuming keywords are separated by space
    
    print(f"Matching keywords for {name} - {address}: {keywords}")
    
    for text_index, text_row in text_data.iterrows():
        text = text_row[text_column] if text_column in text_row else ''
        
        # Calculate match scores for each keyword
        match_scores = calculate_match_scores(keywords, str(text))
        
        # Append the matched data to the list
        matched_data.append({
            'name': name,
            'address': address,
            'keywords': keywords_row[keyword_column],
            'original_text': text,
            'match_scores': ','.join([str(score) for score in match_scores])
        })

# Create a DataFrame from the matched data
matched_df = pd.DataFrame(matched_data)

# Display the DataFrame with matched data
print(matched_df)
