In [33]:
import weaviate
import pandas as pd
import os
from openai.embeddings_utils import get_embedding
from langchain.vectorstores.weaviate import Weaviate
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate

In [13]:
data = {
    "text": [
        "TinkerSpace by TinkerHub",
        "CEO of TinkerHub Foundation is Moosa Mehar MP",
        "CTO of TinkerHub Foundation is Praveen Sridhar",
        "COO of TinkerHub Foundation is Kurian Jacob",
        "TinkerSpace would be the first Hub of its kind where anyone could walk in and learn exponential technologies and coding for free. The space is set up and run by startup entrepreneurs and technologists from the state who have created a great future through learning and building technology.",
        "Here, we aim to provide access to the latest technologies & futuristic skills through a community-owned network. We aspire to break the existing barriers to learning and seek to reach out to different strata of the society. We will create abundant opportunities for professional networking and mentoring to foster innovation and maker culture among learners. The core idea is to build the irreplaceable skill of seeking life-long among the younger generation.",
        "This space is for Public Community, Early Stage Developers, Early Stage Developers, College Students, College Students, CTO and Tech Leadership, Technology Companies, School Students, Individuals looking for Career Switch",
        "Community Meetups - There are 40+ active communities in kerala around technology. These communities share knowledge around technology learning and design.",
        "Hackathons and Hack Nights - Hackathons are a great avenue to showcase the tech skills someone gained along with learning something new with your peer group in a limited time frame. Over years TinkerHub have been part of 100_ hackathins. We are looking forward to work with our Tech companies to create more engaging ones in the coming days",
        "HackNights are single night building events focusing on a specific technology / API. TinkerHub is hosting a hacknight once in every two weeks. Checkout the amazing project build by our community at hacknight.tinkerhub.org",
        "Collaborative Projects - The space and community is build around the themes of open source and collaboration for social good. There would be structured as well as ad hoc programs and activities to encourage community members to co create digital social goods. We are promoting small side projects and micro SaaS within the learner community which will enable them to have a hands on end to end experience of finding problems to creating tech solutions and getting users for the same.",
        "The space is 100 meters from Seaport-Aiport Road. Nearest Metro station is Kalamassery",
        "Our funding partners are Samagata Foundation and Foss United Foundation",
        "Samagata Foundation is a non profit ogranisation in India that supports projects and ideas even the little ones that bring value to society",
        "Tinkerhub is a proud awardee of grants from FOSS United. FOSS United is a non profit foundation aiming to promote and strengthen free and open source software, open standards, and open content in India",
        "No, Its not a co-working space. This space for people to learn new technologies",
        "Its free and open for anyone wish to learn coding / new technologies.",
        "TinkerHub is a non profit organisation running on grants. We have organisational donors like FOSS United helping to set up this space and community memebrs are contributing monthly to cover expenses.",
        "TinkerHub is a non profit organisation running on grants. We have organisational donors like FOSS United helping to set up this space and community memebrs are contributing monthly to cover expenses."
    ]
}

df = pd.DataFrame(data)

In [14]:
import openai
openai.api_key = os.environ.get("OPENAI_API_KEY")
def generate_data_embeddings(df):
    df['embedding'] = df['text'].apply(lambda row: get_embedding(row, engine="text-embedding-ada-002"))
    return df

generate_data_embeddings(df)

Unnamed: 0,text,embedding
0,TinkerSpace by TinkerHub,"[-0.0011928984895348549, -0.021477267146110535..."
1,CEO of TinkerHub Foundation is Moosa Mehar MP,"[0.005719152744859457, -0.021703796461224556, ..."
2,CTO of TinkerHub Foundation is Praveen Sridhar,"[0.023419929668307304, -0.019434433430433273, ..."
3,COO of TinkerHub Foundation is Kurian Jacob,"[0.01906481571495533, -0.0096348337829113, -0...."
4,TinkerSpace would be the first Hub of its kind...,"[0.002446505008265376, -0.016495270654559135, ..."
5,"Here, we aim to provide access to the latest t...","[-0.023390712216496468, -0.006889603100717068,..."
6,"This space is for Public Community, Early Stag...","[0.015731865540146828, -0.020199930295348167, ..."
7,Community Meetups - There are 40+ active commu...,"[0.030483150854706764, -0.005060984753072262, ..."
8,Hackathons and Hack Nights - Hackathons are a ...,"[-0.0018999131862074137, -0.003965637180954218..."
9,HackNights are single night building events fo...,"[-0.002665837062522769, -0.006222536321729422,..."


In [15]:
auth_config = weaviate.AuthApiKey(api_key=os.environ.get("WEAVIATE_API_KEY"))
client = weaviate.Client(
    os.environ.get("WEAVIATE_URL"), 
    auth_config,
    additional_headers={
        "X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")
    }
)


In [16]:
def weaviate_create_schema():
    schema = {
        "classes": [{
            "class": "TinkerSpaceBase",
            "description": "Contains the paragraphs of text along with their embeddings",
            "vectorizer": "text2vec-openai",
            "properties": [{
                "name": "content",
                "dataType": ["text"],
            }]
        }]
    }
    client.schema.create(schema)
weaviate_create_schema()

In [17]:
def weaviate_add_data(df):
    client.batch.configure(batch_size=10)
    with client.batch as batch:
        for index, row in df.iterrows():
            text = row['text']
            ebd = row['embedding']
            batch_data = {
                "content": text
            }
            batch.add_data_object(data_object=batch_data, class_name="TinkerSpaceBase", vector=ebd)

    print("Data Added!")

weaviate_add_data(df)

Data Added!


In [18]:
def query(input_text, k):
    input_embedding = get_embedding(input_text, engine="text-embedding-ada-002")
    vec = {"vector": input_embedding}
    result = client \
        .query.get("TinkerSpaceBase", ["content", "_additional {certainty}"]) \
        .with_near_vector(vec) \
        .with_limit(k) \
        .do()

    output = []
    closest_paragraphs = result.get('data').get('Get').get('TinkerSpaceBase')
    for p in closest_paragraphs:
        output.append(p.get('content'))

    return output

input_text = "is it a co working space ?"
k = 3

result = query(input_text, k)
for text in result:
    print(text)

No, Its not a co-working space. This space for people to learn new technologies
This space is for Public Community, Early Stage Developers, Early Stage Developers, College Students, College Students, CTO and Tech Leadership, Technology Companies, School Students, Individuals looking for Career Switch
TinkerSpace would be the first Hub of its kind where anyone could walk in and learn exponential technologies and coding for free. The space is set up and run by startup entrepreneurs and technologists from the state who have created a great future through learning and building technology.


In [39]:
auth_config = weaviate.AuthApiKey(api_key=os.environ.get("WEAVIATE_API_KEY"))
client = weaviate.Client(
    os.environ.get("WEAVIATE_URL"), 
    auth_config,
    additional_headers={
        "X-OpenAI-Api-Key": os.environ.get("OPENAI_API_KEY")
    }
)

vectorstore = Weaviate(
    client,
    "TinkerSpaceBase",
    "content"
)

MyOpenAI = OpenAI(temperature=0.2,
    openai_api_key=os.environ.get("OPENAI_API_KEY"))

memory = ConversationBufferMemory(
    memory_key='chat_history', 
    return_messages=True, 
    output_key='answer'
)

template = "You are SpaceAI, TinkerSpace's front desk chatbot created by GKS and team. You use the following pieces of context to answer the question about Tinkerspace given at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \n\nContext: {context}\n\nQuestion: {question}\n\nAnswer:"

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template

)

qa = ConversationalRetrievalChain.from_llm(
            MyOpenAI, 
            vectorstore.as_retriever(),
            memory=memory,
            combine_docs_chain_kwargs={'prompt': prompt}
        )

query = "What is TinkerSpace ?"
result = qa({"question": query})
print(result["answer"])
query = "Who is Kurian ?"
result = qa({"question": query})
print(result["answer"])
result = qa({"question": "Who are you ?"})
print(result["answer"])
result = qa({"question": "What is the meaning of life ?"})
print(result["answer"])

 TinkerSpace is a Hub of its kind where anyone can walk in and learn exponential technologies and coding for free. It is set up and run by startup entrepreneurs and technologists from the state. It is funded by grants and donations from organisations like FOSS United and contributions from community members.
 Kurian Jacob is the COO of TinkerHub Foundation.
 I am SpaceAI, the front desk chatbot created by GKS and team for TinkerSpace.
 I'm sorry, I don't know the answer to that question.
