### Article Recommendation

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import joblib

In [None]:
# Load your final preprocessed DataFrame
df = pd.read_pickle('final_nlp_data.pkl')  # Adjust path as needed

In [None]:
 # TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['clean_text'])

In [None]:
# Fit Nearest Neighbors
nn = NearestNeighbors(metric='cosine', algorithm='brute')
nn.fit(tfidf_matrix)

In [None]:
# # Save all to disk
# joblib.dump(tfidf, 'models/tfidf_vectorizer.pkl')
# joblib.dump(tfidf_matrix, 'models/tfidf_matrix.pkl')
# joblib.dump(nn, 'models/nearest_neighbors.pkl')

In [None]:
tfidf = joblib.load('models/tfidf_vectorizer.pkl')
tfidf_matrix = joblib.load('models/tfidf_matrix.pkl')
nn = joblib.load('models/nearest_neighbors.pkl')
df = pd.read_pickle('final_nlp_data.pkl')

In [None]:
# Recommend Function
def recommend_articles(query, top_k=5):
    # Transform query using TF-IDF
    query_vec = tfidf.transform([query])
    
    # Find nearest neighbors
    distances, indices = nn.kneighbors(query_vec, n_neighbors=top_k)
    
    # Fetch and return results
    results = df.iloc[indices[0]].copy()
    results["similarity"] = 1 - distances[0]  # Cosine similarity = 1 - distance
    return results[["clean_title", "similarity", "clean_text"]]

In [None]:
query = "Recent advancements in AI for healthcare"
recommendations = recommend_articles(query, top_k=5)
print(recommendations)