In [42]:
import pandas as pd
import faiss
import json

In [2]:
data = """Lionel Andrés Messi is an Argentine professional footballer widely regarded as one of the greatest players in the sport's history. Renowned for his dribbling, playmaking, and goal-scoring consistency, he has defined an era in club and international football with unprecedented records and sustained excellence.

Early life
Born on 24 June 1987 in Rosario, Argentina, Messi grew up in a footballing household and joined Newell's Old Boys youth ranks.

Diagnosed with a growth hormone deficiency, he moved to Barcelona at 13 after the club offered to cover his medical treatment.

His youth development at La Masia refined his balance, vision, and close control, shaping the foundation for his trademark low center of gravity and elite technical execution.

Barcelona era
Senior debut at 17 in 2004; became Barcelona's all-time top scorer with a staggering goal tally and assist count across all competitions.

Under coaches Frank Rijkaard and Pep Guardiola, Messi evolved from a right-sided forward into a false nine, then into a free-roaming playmaker-forward hybrid.

Formed historic partnerships with Xavi, Iniesta, Neymar, and Suarez, driving Barcelona to domestic and European dominance.

Key achievements included multiple La Liga titles, UEFA Champions League triumphs, and the 2011 peak-era side often cited as one of the greatest club teams ever.

The 2012 calendar year yielded a world-record 91 goals, illustrating his capacity for consistent, multi-competition output.

PSG chapter
Joined Paris Saint-Germain in 2021, adapting to a new league and tactical environment while linking with Kylian Mbappé and Neymar.

Contributed as a chance creator and scorer, adding Ligue 1 titles and honing a deeper playmaking role that emphasized progressive passing, through-balls, and set-piece craft.

Inter Miami impact
Moved to Inter Miami in 2023 and immediately transformed the club's trajectory, winning the Leagues Cup 2023 and energizing MLS viewership and global attention.

Operated as a roaming 10/false nine, orchestrating play, finishing chances, and elevating teammates through tempo control and final-third decision-making.

Argentina journey
Debuted for the senior national team in 2005, facing early scrutiny over international finals defeats but showing unwavering leadership and resilience.

Captained Argentina to Copa América 2021, ending a long national drought; then won the Finalissima 2022 against Italy with a dominant display.

Crowned FIFA World Cup champion in 2022, producing decisive goals and assists, and receiving the Golden Ball as the tournament's best player.

Added the Copa América 2024 title, cementing a legacy of sustained international success across multiple cycles.

Playing style
Dribbling: Signature close control, rapid hip feints, and body swerves create separation in tight spaces and at speed.

Vision: Elite spatial awareness and timing enable defense-splitting passes and intelligent overloads between lines.

Finishing: Precise left-footed strikes, low-driven finishes, and curling shots from the edge of the box.

Set pieces: Prolific free-kick specialist with a variety of trajectories and wall-bypassing techniques.

Off-ball intelligence: Drift-and-receive patterns that create passing triangles, interior overloads, and progressive passing lanes.

Press resistance: Ability to receive under pressure, turn defenders, and retain possession in small pockets.

Leadership: Calmer, example-led captaincy built on consistency, clutch performances, and raising collective tempo.

Records and milestones
Most Ballon d'Or awards with a record total, reflecting dominance across different tactical eras and competitions.

Most goals in a calendar year with 91 in 2012, a modern benchmark for sustained match-to-match scoring.

All-time top scorer for Barcelona and La Liga, with records for goals and assists that spanned over a decade of league play.

Multiple European Golden Shoes and Champions League scoring titles, showcasing repeatable output against elite opposition.

Two World Cup Golden Balls (2014, 2022), underlining top-tier influence across separate tournament cycles.

Club and country career surpassing 800 total senior goals and a massive assist count, spanning domestic leagues, international play, and continental club competitions."""

In [3]:
clean_data = data.strip()

In [7]:
max_char = 800
overlap = 100
chunks = []
i = 0
while i < len(clean_data):
    piece = clean_data[i:i+max_char]
    chunks.append(piece)
    i = i + max_char - overlap

In [None]:
import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer "
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

In [8]:
chunks

["Lionel Andrés Messi is an Argentine professional footballer widely regarded as one of the greatest players in the sport's history. Renowned for his dribbling, playmaking, and goal-scoring consistency, he has defined an era in club and international football with unprecedented records and sustained excellence.\n\nEarly life\nBorn on 24 June 1987 in Rosario, Argentina, Messi grew up in a footballing household and joined Newell's Old Boys youth ranks.\n\nDiagnosed with a growth hormone deficiency, he moved to Barcelona at 13 after the club offered to cover his medical treatment.\n\nHis youth development at La Masia refined his balance, vision, and close control, shaping the foundation for his trademark low center of gravity and elite technical execution.\n\nBarcelona era\nSenior debut at 17 in 2004; be",
 "k low center of gravity and elite technical execution.\n\nBarcelona era\nSenior debut at 17 in 2004; became Barcelona's all-time top scorer with a staggering goal tally and assist cou

In [16]:
df = pd.DataFrame([{"id": i+1, "text": chunk} for i, chunk in enumerate(chunks)])
df

Unnamed: 0,id,text
0,1,Lionel Andrés Messi is an Argentine profession...
1,2,k low center of gravity and elite technical ex...
2,3,"d 91 goals, illustrating his capacity for cons..."
3,4,rol and final-third decision-making.\n\nArgent...
4,5,e separation in tight spaces and at speed.\n\n...
5,6,and raising collective tempo.\n\nRecords and m...
6,7,"sive assist count, spanning domestic leagues, ..."


In [31]:
embeddings = [generate_embeddings(chunk).astype('float32') for chunk in chunks]
len(embeddings)

7

In [18]:
df['embeddings'] = df['text'].apply(lambda x: generate_embeddings(x).astype('float32'))
df

Unnamed: 0,id,text,embeddings
0,1,Lionel Andrés Messi is an Argentine profession...,"[0.04417627, -0.05338783, 0.0036601236, 0.0227..."
1,2,k low center of gravity and elite technical ex...,"[0.041532047, -0.0297674, 0.017267464, -0.0299..."
2,3,"d 91 goals, illustrating his capacity for cons...","[0.0024025533, -0.03145853, 0.05397969, -0.013..."
3,4,rol and final-third decision-making.\n\nArgent...,"[0.013397696, -0.023585001, 0.044793807, -0.02..."
4,5,e separation in tight spaces and at speed.\n\n...,"[0.04461679, 0.015987191, 0.07603125, -0.01060..."
5,6,and raising collective tempo.\n\nRecords and m...,"[0.0701848, 0.0071828635, 0.079857245, -0.0039..."
6,7,"sive assist count, spanning domestic leagues, ...","[-0.0036275818, 0.007166852, 0.08744238, -0.01..."


In [28]:
df['embeddings'].to_list()

[array([ 0.04417627, -0.05338783,  0.00366012, ...,  0.01487568,
         0.00291291,  0.02653712], dtype=float32),
 array([ 0.04153205, -0.0297674 ,  0.01726746, ...,  0.01483626,
         0.01801461, -0.0159392 ], dtype=float32),
 array([ 0.00240255, -0.03145853,  0.05397969, ..., -0.00349116,
        -0.01985518, -0.01030845], dtype=float32),
 array([ 0.0133977 , -0.023585  ,  0.04479381, ...,  0.02717457,
         0.00022573,  0.01842815], dtype=float32),
 array([ 0.04461679,  0.01598719,  0.07603125, ...,  0.00848721,
        -0.01555988, -0.00690323], dtype=float32),
 array([0.0701848 , 0.00718286, 0.07985725, ..., 0.00334128, 0.01415131,
        0.02453847], dtype=float32),
 array([-0.00362758,  0.00716685,  0.08744238, ...,  0.01000642,
        -0.0029058 ,  0.03412916], dtype=float32)]

In [21]:
embeddings = df['embeddings'].to_numpy()

In [33]:
embeddings = np.vstack(embeddings)
embeddings

array([[ 0.0442039 , -0.05337385,  0.0036959 , ...,  0.0148228 ,
         0.00294399,  0.02653017],
       [ 0.04153205, -0.0297674 ,  0.01726746, ...,  0.01483626,
         0.01801461, -0.0159392 ],
       [ 0.00245114, -0.03145079,  0.05406794, ..., -0.00348713,
        -0.01985029, -0.01036937],
       ...,
       [ 0.04461679,  0.01598719,  0.07603125, ...,  0.00848721,
        -0.01555988, -0.00690323],
       [ 0.07021192,  0.00719267,  0.07983392, ...,  0.00335221,
         0.01415909,  0.02455512],
       [-0.00362758,  0.00716685,  0.08744238, ...,  0.01000642,
        -0.0029058 ,  0.03412916]], dtype=float32)

In [34]:
type(embeddings)

numpy.ndarray

In [35]:
faiss.normalize_L2(embeddings)

In [None]:
index = faiss.IndexFlatIP(embeddings.shape[1]) # Cosine Similarity Search
index.add(embeddings)

In [49]:
index_path = "index_messi.faiss"
meta_path = "meta_messi.jsonl"

In [41]:
faiss.write_index(index, index_path)

In [50]:
with open(meta_path, 'w') as f:
    for i, chunk in enumerate(chunks):
        f.write(json.dumps({"id": i+1, "text": chunk}) + "\n")
        

In [51]:
query = "What is Messi's Playing Style like?"
q = generate_embeddings(query).astype("float32").reshape(1, -1)
faiss.normalize_L2(q)
index.search(q, 3)

(array([[0.56445694, 0.5501441 , 0.4915244 ]], dtype=float32),
 array([[0, 1, 4]]))

In [59]:
index.search(q,7)

(array([[0.56445694, 0.5501441 , 0.4915244 , 0.46991688, 0.46381244,
         0.41427535, 0.24485159]], dtype=float32),
 array([[0, 1, 4, 3, 2, 5, 6]]))

In [60]:
query = "What is Most goals scored by Messi in a Calendar year?"
q = generate_embeddings(query).astype("float32").reshape(1, -1)
faiss.normalize_L2(q)
index.search(q, 3)

(array([[0.61699533, 0.55438375, 0.53807294]], dtype=float32),
 array([[5, 1, 0]]))