In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import PorterStemmer
import nltk
nltk.download('punkt', quiet=True)

ps = PorterStemmer()


def stem(text):
    words = nltk.word_tokenize(text)
    return ' '.join([ps.stem(word) for word in words])


data = pd.read_csv('medicine.csv')


data['combined_text'] = data['Reason'] + ' ' + data['Description']
data['combined_text'] = data['combined_text'].apply(stem)

vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_matrix = vectorizer.fit_transform(data['combined_text'])


cosine_sim = cosine_similarity(tfidf_matrix)


def get_drug_recommendations(query, top_k=5):

    processed_query = stem(query)
    query_vec = vectorizer.transform([processed_query])


    sim_scores = cosine_similarity(query_vec, tfidf_matrix)[0]


    top_indices = sim_scores.argsort()[-top_k:][::-1]


    return [(data.iloc[i]['Drug_Name'], sim_scores[i]) for i in top_indices]


def get_similar_drugs(drug_name, top_k=5):

    try:
        idx = data[data['Drug_Name'] == drug_name].index[0]
    except IndexError:
        return "Drug not found in the database."

    sim_scores = cosine_sim[idx]


    top_indices = sim_scores.argsort()[-top_k-1:-1][::-1]


    return [(data.iloc[i]['Drug_Name'], sim_scores[i]) for i in top_indices]


query = "Treatment for moderate acne for age 12"
recommendations = get_drug_recommendations(query)
print(f"Top 5 recommended drugs for '{query}':")
for drug, score in recommendations:
    print(f"{drug}: {score:.4f}")



In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import nltk
import torch
import re


nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords


data = pd.read_csv('medicine.csv')


def preprocess_text(text):

    text = str(text).lower()

    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)

    tokens = word_tokenize(text)

    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    return ' '.join(tokens)

data['combined_text'] = data['Reason'].fillna('') + ' ' + data['Description'].fillna('')
data['combined_text'] = data['combined_text'].apply(preprocess_text)


model = SentenceTransformer('all-MiniLM-L6-v2')


drug_embeddings = model.encode(data['combined_text'].tolist(), show_progress_bar=True)


def get_drug_recommendations(query, top_k=5):

    processed_query = preprocess_text(query)


    query_embedding = model.encode([processed_query])


    cos_scores = util.pytorch_cos_sim(query_embedding, drug_embeddings)[0]


    top_results = torch.topk(cos_scores, k=top_k)

    return [(data.iloc[idx.item()]['Drug_Name'], score.item()) for score, idx in zip(top_results[0], top_results[1])]





query = "Treatment for moderate acne for age 12"
recommendations = get_drug_recommendations(query)
print(f"Top 5 recommended drugs for '{query}':")
for drug, score in recommendations:
    print(f"{drug}: {score:.4f}")



In [None]:
!pip install sentence_transformers