In [31]:
from langchain_community.document_loaders.csv_loader import CSVLoader
import os
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_community import embeddings 
# from langchain_community.vectorstores import pinecone
from pinecone import Pinecone
from langchain_community.chat_models import ChatOllama
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [32]:
## Extract data from CSV file

def load_file(path):
    loader = CSVLoader(file_path=path, encoding="utf-8")
    data = loader.load()
    return data

In [33]:
extracted_data = load_file(r'../data_set/finalData.csv')
extracted_data[:5]

[Document(page_content='AppID: 1071100\nName: Boom 3D\nAbout the game: Boom 3D is a pro audio enhancement app that has been designed to play all your media content with incredible 3D effects on ANY headphones /speakers, from any player, any media, any streaming services. Specially designed for gaming, our multi-channel 3D surround brings unmatched clarity to every game interaction. From the muted footsteps of your opponent to the roar of a dragon, Boom 3D renders the detail in all its crispness and clarity, leading you to victory! With Boom, 3D gaming becomes a truly immersive experience.\nRelease date: Jun 19, 2019\nHeader image: https://cdn.akamai.steamstatic.com/steam/apps/1071100/header.jpg?t=1604570134\nWebsite: \nGenres: Audio Production,Utilities\nTags: Utilities,Audio Production,VR,3D', metadata={'source': '../data_set/finalData.csv', 'row': 0}),
 Document(page_content="AppID: 1055010\nName: Energy Engine PC Live Wallpaper\nAbout the game: Energy Engine PC Live Wallpaper Manage

In [34]:
# Transform data(create chunks)

def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20) 
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [35]:
text_chunks = text_split(extracted_data)
print("length of my chinks = ",len(text_chunks))

length of my chinks =  684


In [36]:
def download_hugging_face_embedding():
    model_name = "BAAI/bge-large-en"
    encode_kwargs = {'normalize_embeddings': True} 

    embedding = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        encode_kwargs=encode_kwargs
    )

    return embedding

In [37]:
embeddings = download_hugging_face_embedding()

In [38]:
embeddings

HuggingFaceBgeEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': True}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='BAAI/bge-large-en', cache_folder=None, model_kwargs={}, encode_kwargs={'normalize_embeddings': True}, query_instruction='Represent this question for searching relevant passages: ', embed_instruction='')

In [39]:
query_result = embeddings.embed_query("hello World")

In [12]:
print("len = ",len(query_result))

len =  1024


In [24]:
PINECONE_API_KEY = "1b2fb2fd-7a91-442c-9930-41f03023f1fb"
host_name = "https://gmaerecommendationindex-csbuh5e.svc.aped-4627-b74a.pinecone.io"

In [18]:
vector=[embeddings.embed_query(t.page_content) for t in text_chunks]

In [19]:
vectors=[]
for i,vec in enumerate(vector):
    vectors.append({'id':str(i),'values':vec,"metadata": {'text':text_chunks[i].page_content}})

In [20]:
import itertools
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY, pool_threads=30)
index = pc.Index("gamerecommendationsystem")

def chunks(iterable, batch_size=100):
    """A helper function to break an iterable into chunks of size batch_size."""
    it = iter(iterable)
    chunk = tuple(itertools.islice(it, batch_size))
    while chunk:
        yield chunk
        chunk = tuple(itertools.islice(it, batch_size))



with pc.Index('gamerecommendationsystem', pool_threads=30) as index:

    async_results = [
        index.upsert(vectors=ids_vectors_chunk, async_req=True)
        for ids_vectors_chunk in chunks(vectors, batch_size=100)
    ]
    [async_result.get() for async_result in async_results]

In [49]:
text="Game with piece"

In [41]:
pc = Pinecone(api_key=PINECONE_API_KEY, pool_threads=30)
index = pc.Index("gamerecommendationsystem")

In [51]:
def find_similarity(text):
    query=embeddings.embed_query(text)
    response=index.query(vector=query,top_k=3,include_values=True,include_metadata=True)
    print(type(response))
    results=response.matches
    matched_data=[]
    for result in results:
        text=result.metadata['text']
        matched_data.append(text)
    return matched_data

In [53]:
find_similarity(text)

<class 'pinecone.core.client.model.query_response.QueryResponse'>


['heroic character by winning head to head battles. Or play classic match 3 in quick play mode. Explore the expansive kingdom as you experience a puzzle game like nothing you have played before. Embark on an epic single-player campaign comprising more than 150 challenging quests. Jump into this exciting puzzle game and outmatch your opponent in Quick-play mode. Build your empire as you capture cities, build castles and gain a party of companions that will aid you in battle. Earn money with each',
 "map to find clues and then integrate them into new clues, so as to push forward the plot. Original BGM: The only BGM in this game was composed by independent musician TetraCalyx, which consists of five voice parts, namely P, R, I, C and E. They will be unlocked one by one in the new game+ by solving puzzles. An updated decoder is recommended for a better experience, since there're 11 video performances to cover the whole story. 【translator】 三月疯兔子 【German translator】 Stealth （Thank you very m

In [24]:
## load ollama LAMA2 llm mODEL
llm = Ollama(model="llama2")
llm

Ollama()

In [25]:
limit = 3750

import time
conversation_history = []
def retrieve(query):
    vector=embeddings.embed_query(query)
    # get relevant contexts
    contexts = []
    for message in conversation_history:
        contexts.append(f"{message['role'].capitalize()}: {message['content']}\n")
    time_waited = 0
    while (len(contexts) < 3 and time_waited < 60 * 12):
        res=index.query(vector=vector,top_k=3,include_values=True,include_metadata=True)
        contexts = contexts + [
            x['metadata']['text'] for x in res['matches']
        ]
        print(f"Retrieved {len(contexts)} contexts, sleeping for 15 seconds...")
        time.sleep(10)
        time_waited += 15

    if time_waited >= 60 * 12:
        print("Timed out waiting for contexts to be retrieved.")
        contexts = ["No contexts retrieved. Try to answer the question yourself!"]


    # build our prompt with the retrieved contexts included
    prompt_start = (
        "You are a helpful assistant for game recommendation. Now use the following pieces of information to answer the user's question.\
        If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n"+
        "Context:\n"
    )
    prompt_end = (
        f"\n\nQuestion: {query}\n Only return the helpful answer below and nothing else.\nAnswer:"
    )
    # append contexts until hitting limit
    for i in range(1, len(contexts)):
        if len("\n\n---\n\n".join(contexts[:i])) >= limit:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts[:i-1]) +
                prompt_end
            )
            break
        elif i == len(contexts)-1:
            prompt = (
                prompt_start +
                "\n\n---\n\n".join(contexts) +
                prompt_end
            )
    conversation_history.append({"role": "user", "content": query})
    return prompt


def complete(prompt):
    # instructions
    ## load ollama LAMA2 llm mODEL
    llm = Ollama(model="llama2")
    return llm(prompt)

In [26]:
query_with_contexts = retrieve(text)
query_with_contexts

Retrieved 3 contexts, sleeping for 15 seconds...


"You are a helpful assistant for game recommendation. Now use the following pieces of information to answer the user's question.        If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nContext:\nfor appearance, clothing and accessories. Fight and earn rare items with showy effects!\n\n---\n\nAbout the game: 其实早上去年6月份，团队已经解散了。这款游戏定位原本是手游，无版号下改pc也是无奈，其中诸多不足有开发上的原因，也有资本上的压力。我个人还是花了3个月进行余下更新，也是算给玩家一个交代，主要更新了新的武器鞭子，一些怪物，boss，和一些新的装备，以及重做的叠加式装备系统。遗憾的将游戏联机部分阉割，其实联机也并不完善，能顺利游玩的可能性较低，索性删掉，改为纯单机也有为了缓解服务器开支，至此 我只能重新回到个人开发者中 改动内容如下 优化 :优化了运行效率,减少部分情况下的卡顿 新增 :一个新类型武器-鞭子 新增 :一个小型怪物-阴影 新增 :两个中型怪物-魔剑,恶魔盔甲 新增 :三个BOSS 新增 :47个被动道具 新增 :两个职业,御兔使(女性),药剂师(男性) 新增 :图标显示系统,当部分道具满足触发条件时会在头顶显示对应道具的图标 提示触发成功 改动 :道具的升级增加了拾取叠加方式,拾取一个同名物品会对该物品叠加并增加对应等级. 同时道具将不再有等级上限,将无限成长 改动 :对之前游戏中存在的60项被动道具,40把武器进行效果调整,将较弱的效果加强\n\n---\n\nAbout the game: 上软JoyGear小组采用创新“同步结算”系统，成功加速CCG类游戏节奏，全新剧情、怀旧内容复刻大为充实了单人体验时间，更以挖掘游戏可玩性的Rogue Like式DBG迷宫冒险、以及轰轰烈烈的高手竞标赛对决，打造出《仙剑》系列最新衍生作品！ 同步结算的博弈乐趣与精妙卡牌

In [27]:
def chatbot(query):
    query_with_contexts = retrieve(query)
    # print(query_with_contexts)
    return complete(query_with_contexts)

In [28]:
chatbot(text)

Retrieved 4 contexts, sleeping for 15 seconds...


'The game you are looking for is "Nine Sages Rise of the Heroes" (九野英雄崛起).'

In [29]:
llm("Who is PM of india")

'\nThe Prime Minister of India is the head of government of India. The Prime Minister is responsible for leading the country and making important decisions related to its governance. The current Prime Minister of India is Narendra Modi, who has been in office since May 2014.'