In [7]:
!pip install rich
!pip install import-ipynb
from rich.console import Console
from rich.table import Table
import numpy as np
import pandas as pd
import gensim
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import os
from datetime import datetime
import import_ipynb
import data_visualizer

#Load datasets and model
df_patterns = pd.read_csv("patterns_with_vectors.csv")
df_patterns["vector_representation"] = df_patterns["vector_representation"].apply(lambda vec: np.fromstring(vec, sep = ","))

# Load TF-IDF Vectorizer
with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)
    
tfidf_matrix = tfidf_vectorizer.fit_transform(df_patterns["filtered_keywords"].astype(str))

word2vec_model = gensim.models.Word2Vec.load("patterns_word2vec.model")

cosine_sim_hybrid = pd.read_csv("patterns_cosim_hybrid.csv", index_col = 0)

#Function to preprocess user input
def preprocess_text(text):
    return text.lower().split() #basic input cleaning

#Convert user input into TF-IDF vector
def get_tfidf_vector(user_text):
    user_text_processed = [" ".join(preprocess_text(user_text))]
    return tfidf_vectorizer.transform(user_text_processed)

#Convert user input into Word2Vec vector
def get_word2vec_vector(user_text):
    words = preprocess_text(user_text)
    vectors = [word2vec_model.wv[word] for word in words if word in word2vec_model.wv]
    if len(vectors) == 0:
        return np.zeros(word2vec_model.vector_size)
    return np.mean(vectors, axis = 0)

#Compare user input (TF-IDF and word embeddings) to patterns
def get_patterns_from_input(user_text, top_n = 5):
    user_query = user_text
    
    #Get TF-IDF and Word2Vec representations
    tfidf_vec = get_tfidf_vector(user_text).toarray()
    word2vec_vec = get_word2vec_vector(user_text).reshape(1, -1)

    #Normalize vectors between tfidf/word2vec
    tfidf_similarities = cosine_similarity(tfidf_vec, tfidf_matrix).flatten()
    word2vec_similarities = cosine_similarity(word2vec_vec, np.vstack(df_patterns["vector_representation"].values)).flatten()

    #Combine similarities between tfidf/word2vec as hybrid
    alpha = 0.5 #Match alpha in hybrid comparison in data_normalize_matrices
    hybrid_similarities = (alpha * tfidf_similarities) + ((1 - alpha) * word2vec_similarities)
    
    #Get top 5 pattern matches
    top_indices = hybrid_similarities.argsort()[-top_n:][::-1]

    # Create a Rich Table
    console = Console()
    table = Table(title="🔍 Recommended Patterns", show_lines=True, highlight=True)

    # Add columns
    table.add_column("Pattern Name", style="bold cyan")
    table.add_column("Designer", style="magenta")
    table.add_column("URL", style="green", overflow="ellipsis", no_wrap = True) 

    # Add rows with pattern info
    for idx in top_indices:
        table.add_row(
            df_patterns.loc[idx, "name"],
            df_patterns.loc[idx, "designer"],
            df_patterns.loc[idx, "url"]
        )

    # Print the table
    console.print(table)

    #Save query and results to csv file for monitoring
    results = df_patterns.loc[top_indices].copy()
    results["search_query"] = user_query
    results["search_timestamp"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    results.to_csv("search_results.csv", mode = "a", index = False, header = not os.path.exists("search_results.csv"))


Unnamed: 0,Pattern Name,URL,Hybrid Similarity Score
0,Wee Lima,https://www.ravelry.com/patterns/library/wee-lima,0.577224
1,Autumn League Pullover,https://www.ravelry.com/patterns/library/autum...,0.553111
2,Maja - cardigan,https://www.ravelry.com/patterns/library/maja-...,0.516509
3,It's Not a Sweatshirt,https://www.ravelry.com/patterns/library/its-n...,0.516116
4,Lakes Pullover,https://www.ravelry.com/patterns/library/lakes...,0.506913
