In [2]:
from dotenv import load_dotenv

load_dotenv()

def convert_url(passage):
    sep = passage.split(',')
    start = sep[0].split(' ')[:4]
    end = sep[-1].split(' ')[-4:]

    start = '%20'.join(start)
    end = '%20'.join(end)

    return start + ',' + end

def convert_url_v2(passage):
    sep = passage.split(',')
    start = sep[0].split(' ')[:4]
    end = sep[-1].split(' ')[-4:]
    if len(start) < 2:
        end = sep[-1].split(' ')[-6:]

    start = '%20'.join(start)
    end = '%20'.join(end)

    return start + ',' + end

def query_huggingface(query, model, index):
    query_emb = model.encode(query, convert_to_tensor=True, show_progress_bar=False)
    res = index.query(query_emb.tolist(), top_k=10, include_metadata=True)

    nocd = 'https://www.treatmyocd.com'

    print(f'Search Query: {query}\n')
    print('---------------------------------------------------------------------------------------------------------------------')
    print('Results\n')

    
    for item in res.matches:
        print(f"Article: {nocd}{item['metadata']['article_name']}#:~:text={convert_url(item['metadata']['text'])}")
        print(f"Paragraph Header: {item['metadata']['paragraph_name']}")
        print(f"{item['score']} {item['metadata']['text'][:150]}...\n")

def query_openai(query, index):
    import openai
    import os

    openai.api_key = os.getenv('OPENAI_API_KEY')
    openai_model_query = 'text-search-babbage-query-001'
    res = openai.Embedding.create(input=query, engine=openai_model_query)
    embed = [record['embedding'] for record in res['data']]
    res = index.query(embed[0], top_k=10, include_metadata=True)

    nocd = 'https://www.treatmyocd.com'

    print(f'Search Query: {query}\n')
    print('---------------------------------------------------------------------------------------------------------------------')
    print('Results\n')

    
    for item in res.matches:
        print(f"Article: {nocd}{item['metadata']['article_name']}#:~:text={convert_url(item['metadata']['text'])}")
        print(f"Paragraph Header: {item['metadata']['paragraph_name']}")
        print(f"{item['score']} {item['metadata']['text'][:150]}...\n")

In [None]:
from IPython.display import clear_output
import pinecone
import os

with open('../secrets', 'r') as fp:
    API_KEY = fp.read()  # get api key app.pinecone.io

pinecone.init(
    api_key=API_KEY,
    environment='us-west1-gcp'
)

index = pinecone.Index('nocd-search')

from sentence_transformers import SentenceTransformer

model = SentenceTransformer('msmarco-distilbert-base-tas-b', device='cpu')
model.max_seq_length = 256


while True:
    clear_output(wait=True)
    query = input("Search NOCD: ")
    if query == 'quit': break
    query_huggingface(query=query, model=model, index=index)

In [4]:
from IPython.display import clear_output
import pinecone
import openai
import os

pinecone.init(
    api_key=os.getenv('PINECONE_API_KEY'),
    environment='us-west1-gcp'
)

index = pinecone.Index('nocd-search-openai')

while True:
    clear_output(wait=True)
    query = input("Search NOCD: ")
    if query == 'quit': break
    query_openai(query=query, index=index)

Search Query: pocd

---------------------------------------------------------------------------------------------------------------------
Results

Article: https://www.treatmyocd.com/blog/erp-is-effective-for-pocd#:~:text=For%20individuals%20with%20POCD,this%20might%20look%20like: 
Paragraph Header: 
0.395876 For individuals with POCD, this might look like: ...

Article: https://www.treatmyocd.com/blog/pocd-symtoms-and-treatment#:~:text=The%20crux%20of%20POCD,little%20while.%20For%20example:
Paragraph Header: Pedophilia vs. Pedophilia OCD
0.394308895 The crux of POCD is that people aren’t sure if they really would act on their thoughts. They never feel certain that can trust themselves, and are pro...

Article: https://www.treatmyocd.com/blog/erp-is-effective-for-pocd#:~:text=If%20you%20are%20experiencing,OCD%20and%20successfully%20recovered.
Paragraph Header: 
0.393606782 If you are experiencing the symptoms of POCD, then please consider giving NOCD a call. A free 15-minute call can p

In [7]:
from fuzzywuzzy import fuzz
from nltk import word_tokenize
from nltk.corpus import stopwords
import pandas as pd

tokenized_query = word_tokenize('what is pocd')
stop_words = set(stopwords.words('english'))
filtered_query = [w for w in tokenized_query if not w.lower() in stop_words]

# Get the data
fuzzywuzzy_df = pd.read_csv('../datasets/blogs.csv', index_col=0)
fuzzywuzzy_df['fuzzy_score'] = [fuzz.token_set_ratio(filtered_query, x) for x in fuzzywuzzy_df['text']]
res = fuzzywuzzy_df.sort_values(by='fuzzy_score', ascending=False).head(10).to_dict('records')

In [16]:
fuzzywuzzy_df = pd.read_csv('../datasets/blogs.csv', index_col=0)
fuzzywuzzy_df_titles = fuzzywuzzy_df[fuzzywuzzy_df['tag'] == 'h1']
fuzzywuzzy_df_titles = fuzzywuzzy_df_titles.set_index('article')
fuzzywuzzy_df_titles[['text']].to_dict('index')

{'/blog/nocd-support-groups-finding-help-and-hope-in-the-ocd-community': {'text': 'NOCD Support Groups: Finding Help and Hope in the OCD Community'},
 '/blog/why-i-stopped-calling-ocd-thoughts-bad': {'text': 'Why I Stopped Calling OCD Thoughts “Bad”'},
 '/blog/why-do-i-need-constant-reassurance-relationship-rocd': {'text': 'Why Do I Need Constant Reassurance in a Relationship?'},
 '/blog/ocd-affected-health-erp-saved-my-life': {'text': 'ERP to Member Advocate: My Journey From OCD to NOCD'},
 '/blog/what-if-my-anxiety-is-not-decreasing-during-erp': {'text': 'What if my anxiety is not decreasing during ERP?'},
 '/blog/two-different-kinds-of-ocd': {'text': 'Can I Have Two Different Kinds of OCD?'},
 '/blog/how-to-deal-with-guilt-and-shame-from-ocd': {'text': 'How to deal with guilt and shame from OCD'},
 '/blog/am-i-going-to-have-ocd-forever': {'text': 'Am I Going to Have OCD Forever?'},
 '/blog/the-suffering-youve-endured-from-ocd-isnt-your-fault-its-the-mental-healthcare-systems': {'tex