In [7]:
import cohere
import os
import time
import openai
from tqdm import tqdm
import configparser
import pickle

Get API keys

In [3]:
config_path = "../config.ini"
config = configparser.ConfigParser()
config.read(config_path)
cohere_api_key = config.get('cohere', 'api_key')
openai_api_key = config.get('openai', 'api_key')
co = cohere.Client(cohere_api_key)
openai.api_key = openai_api_key

# Messing Around with Cohere

In [4]:
text ="""It's an exciting day for the development community. Cohere's state-of-the-art language AI is now available through Amazon SageMaker. This makes it easier for developers to deploy Cohere's pre-trained generation language model to Amazon SageMaker, an end-to-end machine learning (ML) service. Developers, data scientists, and business analysts use Amazon SageMaker to build, train, and deploy ML models quickly and easily using its fully managed infrastructure, tools, and workflows.
At Cohere, the focus is on language. The company's mission is to enable developers and businesses to add language AI to their technology stack and build game-changing applications with it. Cohere helps developers and businesses automate a wide range of tasks, such as copywriting, named entity recognition, paraphrasing, text summarization, and classification. The company builds and continually improves its general-purpose large language models (LLMs), making them accessible via a simple-to-use platform. Companies can use the models out of the box or tailor them to their particular needs using their own custom data.
Developers using SageMaker will have access to Cohere's Medium generation language model. The Medium generation model excels at tasks that require fast responses, such as question answering, copywriting, or paraphrasing. The Medium model is deployed in containers that enable low-latency inference on a diverse set of hardware accelerators available on AWS, providing different cost and performance advantages for SageMaker customers.
"""

In [5]:
co.summarize(text, model='summarize-medium', length='short', extractiveness='high').summary

"Cohere's state-of-the-art language AI is now available through Amazon SageMaker, making it easier for developers to deploy the company's pre-trained generation language model to the end-to-end machine learning service. Cohere's focus is on language, and the company's mission is to enable developers and businesses to add language AI to their technology stack and build game-changing applications with it."

In [11]:
def walkDirContent(path):
    # recursively walk through dir to get all markdown files
    # returns a dictionary where {filename1: content1, filename2: content2, ...}
    text_content = {}
    # only getting a subset of the directories and files because it's a lot
    for item in os.listdir(path):
        # print(item)
        content = os.path.join(path, item)
        if os.path.isdir(content):
            text_content.update(walkDirContent(content))
        elif content.endswith(".md"):
            with open(content) as fp:
                text_content[item] = " ".join(fp.readlines())
    return text_content

In [12]:
path = "../my-second-brain/"
content = walkDirContent(path)

In [13]:
call_limit = 5
calls = 0
summaries = {}
for key in tqdm(content.keys()):
    if calls == call_limit:
        print("Sleeping for 60 seconds")
        time.sleep(60)
        calls = 0
    if len(content[key]) > 250:
        summaries[key] = co.summarize(content[key], model='summarize-medium', length='short', extractiveness='high').summary
    else:
        summaries[key] = content[key]
    calls += 1

  1%|          | 5/594 [00:06<13:24,  1.37s/it]

Sleeping for 60 seconds


  1%|          | 5/594 [00:10<20:05,  2.05s/it]


KeyboardInterrupt: 

# Messing around with Chat GPT

In [6]:
model_engine = "gpt-3.5-turbo"
prompt = {"role":"system", "content":"Categorize the following text using maximum five words: " + text}
response = openai.ChatCompletion.create(model = model_engine, messages = [prompt])["choices"][0]["message"]["content"]
response

'Technology, language AI, Amazon SageMaker, developers, Cohere'

In [43]:
def generate_summarization(text, model_engine = "gpt-3.5-turbo"):
    main_text = "Use a minimum of one word and a maximum of three words to summarize the text, shorter answers are preferred and be as specific as possible: {}".format(text)
    prompt = {"role":"system", "content": main_text}
    return openai.ChatCompletion.create(model = model_engine, messages = [prompt])["choices"][0]["message"]["content"]

In [44]:
with open("/Users/kaanankharwa/Desktop/Label-Generation/Data/cohere_summaries", "rb") as fp:
    summaries = pickle.load(fp)

In [45]:
one_word = {}
for file_name in tqdm(content.keys()):
    try:
        if file_name in summaries:
            one_word[file_name] = generate_summarization(summaries[file_name])
        else:
            one_word[file_name] = generate_summarization(content[file_name])
    except:
        print("sleeping for 1 min")
        time.sleep(60)
        if file_name in summaries:
            one_word[file_name] = generate_summarization(summaries[file_name])
        else:
            one_word[file_name] = generate_summarization(content[file_name])

 34%|███▎      | 200/594 [03:07<08:08,  1.24s/it]

sleeping for 1 min


100%|██████████| 594/594 [11:11<00:00,  1.13s/it]  


In [46]:
for key in one_word.keys():
    print("{:<120} {:>5}".format(key, one_word[key]))

Create a programming language.md                                                                                         Best code talk.
Programming languages.md                                                                                                 Favorite programming languages list.
Inspectional reading.md                                                                                                  Inspectional reading for quick understanding.
Reading philosophy.md                                                                                                    5 styles of philosophy books.
Reading imaginary.md                                                                                                     Imaginary reading techniques.
Reading technics should differ depending on book type.md                                                                 Varying reading techniques.
Elementary reading.md                                                                  

In [47]:
with open("/Users/kaanankharwa/Desktop/Label-Generation/Data/openai_summaries_categorize", "wb") as fp:
    pickle.dump(one_word, fp)