In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY= os.getenv("OPENAI_API_KEY")


In [3]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large",
    # With the `text-embedding-3` class
    # of models, you can specify the size
    # of the embeddings you want returned.
    # dimensions=1024
)

In [4]:
from langchain_core.vectorstores import InMemoryVectorStore

text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is LangChain?")

# show the retrieved document's content
retrieved_documents[0].page_content

'LangChain is the framework for building context-aware reasoning applications'

In [5]:
vectorstore

<langchain_core.vectorstores.in_memory.InMemoryVectorStore at 0x7f936096b490>

In [6]:
retriever

VectorStoreRetriever(tags=['InMemoryVectorStore', 'OpenAIEmbeddings'], vectorstore=<langchain_core.vectorstores.in_memory.InMemoryVectorStore object at 0x7f936096b490>, search_kwargs={})

https://python.langchain.com/docs/integrations/text_embedding/openai/

single text

In [25]:
single_vector = embeddings.embed_query(text)
single_vector

[-0.019262680783867836,
 0.0037691951729357243,
 -0.03288382291793823,
 0.0037786229513585567,
 0.008168160915374756,
 -0.012512446381151676,
 -0.00975955743342638,
 0.021389570087194443,
 -0.015408636070787907,
 0.0018544290214776993,
 0.020590100437402725,
 0.02241530455648899,
 -0.023260027170181274,
 -0.01520499773323536,
 -0.005007995292544365,
 -0.007391318213194609,
 -0.03451292961835861,
 9.22736362554133e-05,
 0.02137448638677597,
 -0.012519988231360912,
 -0.020047064870595932,
 0.027438383549451828,
 -0.03924940899014473,
 0.03034965880215168,
 0.030379826202988625,
 -0.021766677498817444,
 0.03258213773369789,
 -0.02113313600420952,
 -0.02597520500421524,
 0.018870487809181213,
 0.03421124443411827,
 -0.013334541581571102,
 0.0037861650343984365,
 -0.02144990675151348,
 0.0012501132441684604,
 -0.05563098192214966,
 0.05113585665822029,
 0.0025567926932126284,
 -0.019126921892166138,
 0.0037409120704978704,
 0.0008456648793071508,
 0.025205904617905617,
 -0.01877998188138008

multiple text

In [9]:
text2 = (
    "LangGraph is a library for building stateful, multi-actor applications with LLMs"
)
two_vectors = embeddings.embed_documents([text, text2])
# for vector in two_vectors:
#     print(str(vector)[:100])  # Show the first 100 characters of the vectortype()
type(two_vectors)

list

In [13]:
len(two_vectors[0])

3072

In [10]:
from pymilvus import connections, utility, Collection, CollectionSchema, FieldSchema,DataType


In [12]:
connections.connect(host= 'localhost', port=str(19530))
print("Milvus connected")

Milvus connected


In [13]:
def create_milvus_db(collection_name):
    item_id    = FieldSchema( name="id",         dtype=DataType.INT64,    is_primary=True, auto_id=True )
    text       = FieldSchema( name="text",       dtype=DataType.VARCHAR,  max_length= 50000             )
    embeddings = FieldSchema( name="embeddings", dtype=DataType.FLOAT_VECTOR,    dim=768                )
    schema     = CollectionSchema( fields=[item_id, text, embeddings], description="Inserted policy from user", enable_dynamic_field=True )
    collection = Collection( name=collection_name, schema=schema, using='default' )
    return collection

In [15]:
def connect_openai_embedding():
    openai_embedding = OpenAIEmbeddings(
    model="text-embedding-3-large",
    dimensions=768
)
    return openai_embedding
openai_embedding = connect_openai_embedding()

In [14]:
def embedding_data(chunks, collection_name, model_emb):
    print('embedding...')
    vector  = model_emb.embed_documents(chunks) # embedding chunks of text
    print('no. of chunks : ', len(chunks))
    print('storing data in Milvus vector database...')
    print('üìå')
    print([chunks,vector])
    collection = create_milvus_db(collection_name)
    print('ü©∑')
    collection.insert([chunks,vector])
    collection.create_index(field_name="embeddings",
                            index_params={"metric_type":"IP","index_type":"IVF_FLAT","params":{"nlist":16384}})
    return collection

In [16]:
embedding_data(['apple','banana'], 'testest', openai_embedding)

embedding...
no. of chunks :  2
storing data in Milvus vector database...
üìå
[['apple', 'banana'], [[-0.03152789548039436, 0.02124146930873394, -0.0013050902634859085, 0.028364818543195724, -0.012369426898658276, -0.0030377102084457874, 0.005779685452580452, 0.0327879823744297, 0.024353113025426865, 0.04832048341631889, 0.026821855455636978, -0.03409950062632561, 0.02134433388710022, 0.02610180526971817, -0.026294676586985588, 0.033868055790662766, 0.03391948714852333, -0.018669862300157547, -0.0314764641225338, -0.014761021360754967, 0.0243016816675663, -0.08090274035930634, 0.020521419122815132, 0.043357282876968384, 0.025446046143770218, 0.007296933326870203, -0.005496808793395758, -0.0006561614573001862, -0.005692893639206886, 0.03898555412888527, 0.05379800498485565, 0.047060396522283554, -0.013860958628356457, 0.008486301638185978, -0.08357720822095871, 0.05575242638587952, 0.011025762185454369, -0.0030119940638542175, 0.03553960099816322, -0.04114570468664169, 0.01661257818341

<Collection>:
-------------
<name>: testest
<description>: Inserted policy from user
<schema>: {'auto_id': True, 'description': 'Inserted policy from user', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 50000}}, {'name': 'embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}

In [17]:
chunks = ['# Data Team Organization\nGitLab Data Team Organization\n## Data Team Organization\nThe Data Team Organization model is guided by three primary business  \nneeds:\n- The need for bespoke data solutions unique to the GitLab business.\n- The need for high-performance and reliable data storage and compute  \nplatform to support distributed analyst teams.\n- The need for centers of excellence for data technologies and advanced  \nanalytics.\n- The need for flexible data solutions driven by varying urgency and  \nquality requirements.\nBased on these needs, the Data Team is organized in the following way:\n- Data Pods: Pods are assembled to provide concentrated focus on  \ndelivering & maintaining data products for strategic company  \ninitiatives. Pods are staffed with multiple data personas including Data  \nAnalyst, Data Scientist, Analytics Engineer, and supported by Data  \nEngineer as stable counterpart.\n- Analytics Engineering: Transform raw data into clean, structured, and  \nusable formats for data ', 's for strategic company  \ninitiatives. Pods are staffed with multiple data personas including Data  \nAnalyst, Data Scientist, Analytics Engineer, and supported by Data  \nEngineer as stable counterpart.\n- Analytics Engineering: Transform raw data into clean, structured, and  \nusable formats for data decision-making. The Lead Analytics Engineer  \nserves as a stable counterpart for business departments and functional  \nanalytics teams.\n- Data Platform & Engineering Team: Center of Excellence for data  \ntechnologies, including owning and operating the Data Stack\n- Data Science Team: Center of Excellence for advanced analytics,  \nincluding delivery of data science projects to the business\n### Data Pod Assignments\nPOD | Data Product Manager | Analytics Engineer | Data Analyst | Data \nScientist\n--- | --- | --- | --- | ---\nEnterprise Metrics | @nmcavinue | @lisvinueza @chrissharp | @annie-\nanalyst |\nCustomer Intelligence | @nmcavinue | @snalamaru | @jonglee1218 |\nCustomer Product Adoption | @m', 'a Pod Assignments\nPOD | Data Product Manager | Analytics Engineer | Data Analyst | Data \nScientist\n--- | --- | --- | --- | ---\nEnterprise Metrics | @nmcavinue | @lisvinueza @chrissharp | @annie-\nanalyst |\nCustomer Intelligence | @nmcavinue | @snalamaru | @jonglee1218 |\nCustomer Product Adoption | @mdrussell | @elonmusk | @utkarsh060 |\n### Analytics Engineering - Business Stable Counterpart Assignments\nDepartment | Functional Analytics Team | Analytics Engineer\n--- | --- | ---\nSales | Revenue Strategy and Analytics | @snalamaru\nMarketing | Marketing Strategy and Analytics | @snalamaru\nFinance | FP&A Analytics | @chrissharp\nCustomer Success | CS Strategy and Analytics | @mdrussell\nProduct | Product Data Insights | @elonmusk\nEngineering | Engineering Analytics | @elonmusk\nSecurity | Engineering Analytics | @elonmusk\nSupport | N/A | @elonmusk\nPeople | People Analytics | @rakhireddy (ramping)22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tm', 'oduct Data Insights | @elonmusk\nEngineering | Engineering Analytics | @elonmusk\nSecurity | Engineering Analytics | @elonmusk\nSupport | N/A | @elonmusk\nPeople | People Analytics | @rakhireddy (ramping)22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 1/6\n### Data Platform Team Stable Counterpart Assignments\nPOD | Data Engineer\n--- | ---\nEnterprise Metrics | @juwong\nCustomer Intelligence | @rigerta\nCustomer Product Adoption | @rbacovic\n## Manager, Data\nIn support of the Data Pod, the Manager, Data fulfills the below  \nresponsibilities from the Senior Manager, Data Job Responsibilities:\n- Works with the Director, Data to envision and draft Quarterly  \nObjectives, driven by requirements gathered from multiple business  \npartners.\n- Monitor, measure, and improve key aspects of the Data Pods.\n- Regularly meet with business partners to understand and solve for data  \nneeds.\n- Serve as a primary or back-up Maintainer on the Data ', 'and draft Quarterly  \nObjectives, driven by requirements gathered from multiple business  \npartners.\n- Monitor, measure, and improve key aspects of the Data Pods.\n- Regularly meet with business partners to understand and solve for data  \nneeds.\n- Serve as a primary or back-up Maintainer on the Data Team Project.  \nProvide final review, feedback, and approval of Merge Requests submitted  \nby the Data Pod and stable counterparts.\n## Lead Analytics Engineer (Stable Counterparts for the Business)\nIn support of the Data Pod and Stable Counterpart relationships, the  \nLead Analytics Engineer fulfills the below responsibilities from the  \nSenior Analytics Engineer Job Responsibilities:\n- Own one or more stakeholder relationship in Go To Market, Research &  \nDevelopment, General & Administrative, Financial Analytics, or  \nEngineering Analytics business functions.\n- Co-DRI of Key Results along with the Manager, Data.\n- Lead work breakdown sessions for OKRs.\n- Work with functional stakeholders t', ' more stakeholder relationship in Go To Market, Research &  \nDevelopment, General & Administrative, Financial Analytics, or  \nEngineering Analytics business functions.\n- Co-DRI of Key Results along with the Manager, Data.\n- Lead work breakdown sessions for OKRs.\n- Work with functional stakeholders to prioritize P3-Other issues.\n- Serve as a primary or back-up Maintainer on the Data Team Project.  \nProvide final review, feedback, and approval of Merge Requests submitted  \nby the Data Pod and stable counterparts.\n- Review the weekly stand-up and provide support as needed to unblock  \nteam members and answer questions.\n## Data Platform Team Stable Counterpart\nFollowing the GitLab Stable Counterpart principles, every Data Pod have  \na Data Platform Team Stable Counterpart assigned. The Data Platform  \nStable Counterpart divides their time, work and priorities between the  \nData Platform Team and Data Pod (general an average of 50% each, P2-OKR  \nscheduled ahead of the quarter in collaborat', "part principles, every Data Pod have  \na Data Platform Team Stable Counterpart assigned. The Data Platform  \nStable Counterpart divides their time, work and priorities between the  \nData Platform Team and Data Pod (general an average of 50% each, P2-OKR  \nscheduled ahead of the quarter in collaboration with the respective  \nPod). The Stable Counterpart is aware of the direction and priorities of  \nthe Data Pod and when needed brought into discussion with the Data  \nPlatform Team.\nThe stable counterpart is expected to participate in the following  \nmeetings asynchronously or synchronously:\n- Data Pod Iteration Planning Meetings.\n- Data Pod Team Meetings.22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 2/6\n## Data Program Recruiting\nRecruiting great people is critical to our success and we've invested  \nmuch effort into making the process efficient. Here are some reference  \nmaterials we use:\n- GitLab Data Recruiting video", "///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 2/6\n## Data Program Recruiting\nRecruiting great people is critical to our success and we've invested  \nmuch effort into making the process efficient. Here are some reference  \nmaterials we use:\n- GitLab Data Recruiting video\n- Data Roles and Career Development\n- Take Home Test\n## Data Roles and Career Development\n### Data Internships\nSee Data Team Internships.\n### Data Platform\n#### Data Engineering Job Family\nData Engineering Roles:\n- Data Engineer\n- Senior Data Engineer\n- Staff Data Engineer\nIntermediate and Senior Data Engineer Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Perform triage activities | \nExtract new data sources | Own a specific area of the data platform\nCreate a MR to contribute to handbook or templates | Investigate  \nincidents and issues | Work on OKR assignments | Propose new ideas and  \ncome up with Data Plat", '--\nComplete People and Data Onboarding | Perform triage activities | \nExtract new data sources | Own a specific area of the data platform\nCreate a MR to contribute to handbook or templates | Investigate  \nincidents and issues | Work on OKR assignments | Propose new ideas and  \ncome up with Data Platform improvement initiatives\nUnderstand the current setup of the data platform | Make \nsmall/corrective changes to the platform infrastructure or data  \npipelines | Contribute on work breakdown |\n### Data Analyst\n#### Data Analyst Job Family\nData Analyst Roles:\n- Data Analyst Intern\n- Junior Data Analyst\n- Data Analyst\n- Senior Data Analyst\n- Staff Data Analyst\nIntermediate and Senior Data Analyst Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Extend an existing Tableau  \ndashboard or complete the triage phase for a dbt issue | Run a project  \nend-to-end as DRI with support from a Data Fusion Team | Create \nERDs', ' Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Extend an existing Tableau  \ndashboard or complete the triage phase for a dbt issue | Run a project  \nend-to-end as DRI with support from a Data Fusion Team | Create \nERDs/Data Artifacts or complete a product evaluation22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 3/6\nStart attending Data Fusion Team and Business Team synchronous meetings \n| Perform triage activities | Complete First Issue: S to M T-Shirt Size \n|\n### Data Science\n#### Data Science Job Family\nData Science Roles:\n- Data Scientist\n- Senior Data Scientist\n- Staff Data Scientist\n- Principal Data Scientist\nIntermediate and Senior Data Scientist Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Meet stakeholders across the  \norganization | Re-train or enh', 'Senior Data Scientist\n- Staff Data Scientist\n- Principal Data Scientist\nIntermediate and Senior Data Scientist Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Meet stakeholders across the  \norganization | Re-train or enhance an existing data science model | Make \na contribution to improve the Data Science handbook, packages, or  \nprocesses\nStart attending Data Science Team meetings | Refine/improve one data  \nscience dashboard | Work on OKR assignments | Take ownership of at least  \none quarterly OKR\nUnderstand the current data science systems and processes | | |\n### Analytics Engineering\n#### Analytics Engineering Job Family\nAnalytics Engineer Roles:\n- Analytics Engineer\n- Senior Analytics Engineer\n- Staff Analytics Engineer\n- Principal Analytics Engineer\nIntermediate and Senior Analytics Engineer Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and ', 'ng Job Family\nAnalytics Engineer Roles:\n- Analytics Engineer\n- Senior Analytics Engineer\n- Staff Analytics Engineer\n- Principal Analytics Engineer\nIntermediate and Senior Analytics Engineer Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People and Data Onboarding | Extend an existing dbt Trusted  \nData Models | Run a project end-to-end as DRI with support from a Data  \nFusion Team | Create ERDs/Data Artifacts\nStart attending Data Fusion Team and Business Team synchronous meetings \n| Perform triage activities | Complete First Issue: S to M T-Shirt Size \n|\n### Data Management\n#### Data Management Job Family\nData Management Roles:\n- Manager, Data\n- Senior Manager, Data22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 4/6\n- Director, Data\n- Senior Director, Data\nData Manager Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete Pe', 'ata\n- Senior Manager, Data22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 4/6\n- Director, Data\n- Senior Director, Data\nData Manager Onboarding Timeline:\nBy Day 30 | By Day 60 | By Day 90 | By Day 120\n--- | --- | --- | ---\nComplete People, Data, and Manager Onboarding | Meet everyone on the  \nteam and business data champions | Complete a Team Assessment | Draft a  \npeople development Roadmap\nUnderstand the current setup of the data platform | Work on OKR  \nassignments and map them to the data platform | Lead discussions with  \nUsers/Stakeholders on initiatives and OKRs | Draft a program development  \nRoadmap\nAdd a new page to the handbook | Make regular contributions to the  \nhandbook spanning your area of management | Become DRI for major  \nportions of the Data Handbook | System/Application Change Control  \nManagement of one or more modules\n## Tool Technology Tandem\nTool Technology Tandems (TTT) are supporting to get th', 'the handbook | Make regular contributions to the  \nhandbook spanning your area of management | Become DRI for major  \nportions of the Data Handbook | System/Application Change Control  \nManagement of one or more modules\n## Tool Technology Tandem\nTool Technology Tandems (TTT) are supporting to get the maximum value  \nout of business opportunities we have in the Data Program. TTT are  \nexperts in a specific (software) tool or technology to support business  \nopportunities or challenges we have by leveraging the tool or technology  \nto the maximum.\nTool / Technology | Tandem\n--- | ---\nSnowflake | t.b.d.\nMonte Carlo | t.b.d.\ndbt | t.b.d.\nTableau | t.b.d.\nWhat do we expect from TTT:\n- We expect TTT to get in touch with our business partners and all  \nfunctions that contribute to the data program or work with our Data  \nPlatform, to understand their challenges.\n- We expect TTT to get up to date with the latest in their area.\n- TTT will guide and educate our business partners.\n- TTT will init', 'TT to get in touch with our business partners and all  \nfunctions that contribute to the data program or work with our Data  \nPlatform, to understand their challenges.\n- We expect TTT to get up to date with the latest in their area.\n- TTT will guide and educate our business partners.\n- TTT will initiate design-spikes for quarterly planning.\nRelated Links:\n- Data Analytics at GitLab\n- GitLab Data Analytics Team Handbook\n- Data Platform at GitLab\n- GitLab Data Platform Team Handbook\n- Data Science Handbook\n- GitLab Data Science Team Handbook\n- Data Team Internships\n- GitLab Data Team Internships\nLast modified October 01, 2024: Move the Data team handbook into  \n/enterprise-data top level (f156d4f3)22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 5/6\n*View page source - Edit this page - please contribute. *22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 6/6\n', 'd4f3)22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 5/6\n*View page source - Edit this page - please contribute. *22/10/2024, 15:41 tmp77f9s2ec.html\nÔ¨Åle:///private/var/folders/z8/lzg1ky6j6n98zstxj6ybggtr0000gn/T/tmp77f9s2ec.html 6/6\n']
len(chunks)

16

In [18]:
vector = embedding_data(chunks, 'testest2', openai_embedding)
vector

embedding...
no. of chunks :  16
storing data in Milvus vector database...
üìå
[['# Data Team Organization\nGitLab Data Team Organization\n## Data Team Organization\nThe Data Team Organization model is guided by three primary business  \nneeds:\n- The need for bespoke data solutions unique to the GitLab business.\n- The need for high-performance and reliable data storage and compute  \nplatform to support distributed analyst teams.\n- The need for centers of excellence for data technologies and advanced  \nanalytics.\n- The need for flexible data solutions driven by varying urgency and  \nquality requirements.\nBased on these needs, the Data Team is organized in the following way:\n- Data Pods: Pods are assembled to provide concentrated focus on  \ndelivering & maintaining data products for strategic company  \ninitiatives. Pods are staffed with multiple data personas including Data  \nAnalyst, Data Scientist, Analytics Engineer, and supported by Data  \nEngineer as stable counterpart

<Collection>:
-------------
<name>: testest2
<description>: Inserted policy from user
<schema>: {'auto_id': True, 'description': 'Inserted policy from user', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 50000}}, {'name': 'embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 768}}], 'enable_dynamic_field': True}

In [49]:
def find_answer(question, collection, model_emb):
    embedded_vector  = model_emb.embed_query(question)    # embedding question
    print('embedding question...')
    print(len(embedded_vector))
    collection.load()           # query data from collection
    print('a')
    hits = collection.search(data=[embedded_vector], anns_field="embeddings", param={"metric":"IP","offset":0},
                    output_fields=["text"], limit=15)
    return hits

In [51]:
collection = Collection('aneA5EnNGg')
question = 'What is The Data Team Organization model guided by?'
model_emb = openai_embedding

hits = find_answer(question, collection, model_emb)

embedding question...
768
a


In [59]:
[hits[0][i].text for i in range(4)]

['# Data Team Organization\nGitLab Data Team Organization\n## Data Team Organization\nThe Data Team Organization model is guided by three primary business  \nneeds:\n- The need for bespoke data solutions unique to the GitLab business.\n- The need for high-performance and reliable data storage and compute  \nplatform to support distributed analyst teams.\n- The need for centers of excellence for data technologies and advanced  \nanalytics.\n- The need for flexible data solutions driven by varying urgency and  \nquality requirements.\nBased on these needs, the Data Team is organized in the following way:\n- Data Pods: Pods are assembled to provide concentrated focus on  \ndelivering & maintaining data products for strategic company  \ninitiatives. Pods are staffed with multiple data personas including Data  \nAnalyst, Data Scientist, Analytics Engineer, and supported by Data  \nEngineer as stable counterpart.\n- Analytics Engineering: Transform raw data into clean, structured, and  \nusa

collection = Collection('agCx0LHzHFrTpxTpksY5nupZBUicidL')


In [31]:
collection = Collection('agCx0LHzHFrTpxTpksY5nupZBUicidL')


In [34]:
collection.load()

KeyboardInterrupt: 

In [37]:
for i in utility.list_collections():
    utility.drop_collection(i)

In [39]:
utility.list_collections()

['aneA5EnNGg']