In [28]:
import pandas as pd
import os
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

from dotenv import load_dotenv

load_dotenv()
# Note: Gemini API Key ENV Var must be 'GOOGLE_API_KEY" for Langchain to use 

#from opensearchpy import OpenSearch

# Current Openserach pasword: 08FDH5fj7*SG
# cd desktop/opensearch-2.17.1

# Import this to simulate the LLM we use
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage

from IPython.display import Markdown, display

In [45]:
# Gemini model connection
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
model = genai.GenerativeModel("gemini-1.5-flash")

In [2]:
# Import data for retrieval docs
abilities_entries = pd.read_json("Cleaned-Jsons/fandom_abilities.json")
maps_entries = pd.read_json("Cleaned-Jsons/fandom_maps.json")
players_entries = pd.read_json("Cleaned-Jsons/fandom_players.json") # change player id to player gamer tag
teams_entries = pd.read_json("Cleaned-Jsons/fandom_teams.json")
tournaments_entries = pd.read_json("Cleaned-Jsons/fandom_tournaments.json")
weapons_entries = pd.read_json("Cleaned-Jsons/fandom_weapons.json")

all_entries = [abilities_entries,maps_entries,players_entries,teams_entries,tournaments_entries,weapons_entries]

In [33]:
# See if need to change column names to be better labels
for collection in all_entries:
    print(collection.columns)

Index(['title', 'summary', 'stats', 'description', 'agent', 'friendlyfire',
       'creds', 'uses', 'fulleffects', 'health', 'restock', 'points',
       'mapobjects', 'function', 'uses_per_round', 'credits'],
      dtype='object')
Index(['title', 'features', 'trivia', 'location', 'elements', 'sites', 'added',
       'codename', 'rotation', 'teleporters', 'coordinates'],
      dtype='object')
Index(['biography', 'trivia', 'tournament results', 'id', 'is_retired', 'name',
       'pronoun', 'country', 'residency', 'birth_date_year', 'role', 'stream',
       'facebook', 'twitter', 'instagram', 'youtube', 'native_name'],
      dtype='object')
Index(['name', 'history', 'player_roster', 'org_counrtry', 'country', 'region',
       'headcoach', 'owner', 'is_disbanded', 'website', 'facebook', 'twitter',
       'instagram', 'weibo', 'stream', 'youtube', 'discord', 'created',
       'date_disbanded', 'tiktok', 'reddit', 'twitch'],
      dtype='object')
Index(['name', 'overview', 'participants', 'r

In [34]:
# Change abilities columns
abilities_entries.rename(columns={'title': 'ability name'}, inplace=True)

# Change maps columns
maps_entries.rename(columns={'title': 'map name'}, inplace=True)

# Change players columns
players_entries.rename(columns={'id': 'player'}, inplace=True)

# Change teams columns
teams_entries.rename(columns={'name': 'team name'}, inplace=True)

# Change tournaments columns
tournaments_entries.rename(columns={'name': 'tournament name', 'sdate': 'start date'}, inplace=True)

# Change weapons columns
weapons_entries.rename(columns={'title': 'weapon'}, inplace=True)

In [35]:
total_rows = sum(len(df) for df in all_entries)
total_rows

2025

In [36]:
# Stored consolidated string entries
docs = []

# Convert dataframe rows into strings
def rowToString(row):

    row_str = []

    for col_name, col_value in row.items():

            row_str.append(f"{col_name}: {col_value}")
    
    return ' | '.join(row_str)

# Convert all and add to corpus
for topic in all_entries:
      
      # Convert to strings
      converted_rows = (topic.apply(rowToString, axis=1)).tolist()
      docs.extend(converted_rows)

print((docs[2000]))


tournament name: VCT 2024 - Americas League Kickoff | overview: 
==
*Group Stage:
**All matches are Best of 3
**Three Double-Elimination format (GSL) groups
**Top team from each group will advance to the Playoffs
**Second place team from each group advances to the Play-In
<br>
*Play-In:
**All matches are Best of 3
**Three team Round-Robin group
**Winning team will advance to the Playoffs
<br>
*Playoffs:
**Four team Single-Elimination bracket
**Semi-Finals are Best of 3
**Grand Final is Best of 5
**Top 2 teams qualify to [[VCT/2024_Season/Masters/Madrid|Masters Madrid]]

==
{{TournamentResults|prize=yes|prizeunit=USD|totalprize=|prize_ref=|pointstitle=Points |points=yes
|{{TournamentResults/Line|place=1 |prize=|points=3|team=}}
|{{TournamentResults/Line|place=2 |prize=|points=|team=}}
|{{TournamentResults/Line|place=3-4 |prize=|points=|team=}}
|{{TournamentResults/Line|place=3-4 |prize=|points=|team=}}
|{{TournamentResults/Line|place=5 |prize=|points=|team= |hide=true}}
|{{TournamentRes

In [38]:
# Process text data into embeddings

text_embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')   # Initiate embeddings model

data_embeddings = text_embedder.encode(docs, convert_to_tensor=True)
data_embeddings = data_embeddings.cpu().detach().numpy()

embeddings_size = data_embeddings.shape[1]
index = faiss.IndexFlatL2(embeddings_size)
index.add(data_embeddings)

In [52]:
# Query
query = "What team does TenZ play for?"  # Prompt
question_embedding = text_embedder.encode([query], convert_to_tensor=True).cpu().detach().numpy()

# Search corpus embeddings for relevant segments
top_k = 10  # Number of segments to retrieve
distances, indices = index.search(question_embedding, top_k)

# Retrieve the top-k segments
similar_segment = [docs[i] for i in indices[0]]

context = similar_segment  # Use the most relevant segment
print(context)

prompt = f"""
Summarize who or what is in this list for names and topics: {context}
"""

response = model.generate_content(prompt)
display(Markdown(response.text))

['biography:  | trivia:  | tournament results:  | player: TenZ | is_retired: No | name: Tyson Ngo | pronoun: He | country: Canada | residency: North America | birth_date_year: 2001 | role: Player | stream: https://www.twitch.tv/tenz | facebook:  | twitter: TenZOfficial | instagram: tenzofficial | youtube: https://www.youtube.com/channel/UCckPYr9b_iVucz8ID1Q67sw | native_name: ', 'team name: 9z Academy | history:  | player_roster: \n==\n{{TeamMembersCurrent}}\n\n==\n{{TeamMembersFormer}} | org_counrtry: Argentina | country: Argentina | region: LAS | headcoach:  | owner: Francisco "\'\'\'Frankkaster\'\'\'" Postiglione | is_disbanded: yes | website: http://9z.gg | facebook: https://www.facebook.com/9zteam | twitter: 9zTeam | instagram:  | weibo:  | stream:  | youtube: https://www.youtube.com/9zteam | discord:  | created: Main Team 2018-08-08 <br> Sister Division 2020-10-23 | date_disbanded: Sister Division 2021-01-01\n<!-- Remember to create or edit a CargoConcept for sister teams if appl

The list contains information about Valorant players, teams, and coaches:

* **Players:** 
    * TenZ (Tyson Ngo) - Canadian Valorant player
    * nitr0 (Nick Cannella) - American Valorant player
    * YuZ (Brandon Cyr) - Canadian Valorant player
    * Aricune (Leon Neziri) - German Valorant coach
* **Teams:**
    * 9z Academy - Disbanded Valorant team from Argentina
    * 9z Team - Argentinian Valorant team
    * INFINITY - Mexican Valorant team
    * Arctic Gaming México - Disbanded Valorant team from Mexico
    * TNL Esports - South Korean Valorant team
    * Pro Hub Gaming - Disbanded Valorant team from a region not specified
* **Other:**
    *  The list includes information about each entity, such as their name, country, region, role, social media links, date of creation and disbandment, and other relevant details.

Overall, the list provides data about individuals and teams involved in the Valorant esports scene. 
