In [None]:
# !pip install pinecone-client
# !pip install sentence_transformers

# Connecting to Pinecone

In [1]:
import pandas as pd
import itertools
import pinecone
from sentence_transformers import SentenceTransformer

api_key = "<YOUR API KEY>"
pinecone.init(api_key=api_key, environment='asia-northeast1-gcp')

index_name = "manfye-test"

index = pinecone.Index(index_name=index_name)


  from tqdm.autonotebook import tqdm


In [3]:
pinecone.describe_index(index_name)


IndexDescription(name='manfye-test', metric='cosine', replicas=1, dimension=300.0, shards=1, pods=1, pod_type='s1.x1', status={'ready': True, 'state': 'Ready'}, metadata_config=None, source_collection='')

In [4]:
pinecone.whoami()


WhoAmIResponse(username='07f62cb', user_label='default', projectname='c716a21')

In [5]:
index.describe_index_stats()


{'dimension': 300,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10}

In [2]:
import pandas as pd

data = {
    'ticketno': [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010],
    'complains': [
        'Broken navigation button on the website',
        'Incorrect pricing displayed for a product',
        'Unable to reset password',
        'App crashes on the latest iOS update',
        'Payment processing error during checkout',
        'Wrong product delivered',
        'Delayed response from customer support',
        'Excessive delivery time for an order',
        'Difficulty in finding a specific product',
        'Error in applying a discount coupon'
    ]
}

df = pd.DataFrame(data).reset_index(drop=True)

In [67]:
df

Unnamed: 0,ticketno,complains
0,1001,Broken navigation button on the website
1,1002,Incorrect pricing displayed for a product
2,1003,Unable to reset password
3,1004,App crashes on the latest iOS update
4,1005,Payment processing error during checkout
5,1006,Wrong product delivered
6,1007,Delayed response from customer support
7,1008,Excessive delivery time for an order
8,1009,Difficulty in finding a specific product
9,1010,Error in applying a discount coupon


In [8]:
index.describe_index_stats()


{'dimension': 300,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [9]:
model = SentenceTransformer("average_word_embeddings_glove.6B.300d")

df["question_vector"] = df.complains.apply(lambda x: model.encode(str(x)).tolist())


In [10]:
df

Unnamed: 0,ticketno,complains,question_vector
0,1001,Broken navigation button on the website,"[-0.2649574875831604, -0.17953598499298096, 0...."
1,1002,Incorrect pricing displayed for a product,"[0.10973000526428223, 0.3845505118370056, 0.12..."
2,1003,Unable to reset password,"[-0.21170000731945038, 0.2875896692276001, -0...."
3,1004,App crashes on the latest iOS update,"[-0.008352995850145817, -0.13370580971240997, ..."
4,1005,Payment processing error during checkout,"[-0.17191250622272491, 0.37106096744537354, 0...."
5,1006,Wrong product delivered,"[0.04049666225910187, 0.22198998928070068, -0...."
6,1007,Delayed response from customer support,"[-0.16759249567985535, -0.19862000644207, 0.04..."
7,1008,Excessive delivery time for an order,"[-0.012894751504063606, -0.08605450391769409, ..."
8,1009,Difficulty in finding a specific product,"[-0.20433326065540314, 0.3896175026893616, 0.1..."
9,1010,Error in applying a discount coupon,"[-0.17083999514579773, 0.1372150033712387, 0.1..."


In [11]:
def chunks(iterable, batch_size=3):
    it = iter(iterable)
    chunk = tuple(itertools.islice(it, batch_size))
    while chunk:
        yield chunk
        chunk = tuple(itertools.islice(it, batch_size))

for batch in chunks([(str(t), v) for t, v in zip(df.ticketno, df.question_vector)]):
    index.upsert(vectors=batch)
    
# it overwrite if ID is same

In [12]:
index.describe_index_stats()


{'dimension': 300,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 10}},
 'total_vector_count': 10}

In [13]:
index.fetch(["1010","1009"])


{'namespace': '',
 'vectors': {'1009': {'id': '1009',
                      'values': [-0.204333261,
                                 0.389617503,
                                 0.149363756,
                                 -0.175599009,
                                 -0.123085007,
                                 0.0021112496,
                                 0.122617498,
                                 0.0110924989,
                                 0.0231095515,
                                 -1.80382502,
                                 -0.152481,
                                 -0.090285,
                                 -0.424822509,
                                 0.0721485168,
                                 -0.169693246,
                                 -0.106597558,
                                 -0.160515,
                                 -0.216137514,
                                 0.0758404,
                                 -0.168303,
                         

In [14]:
query_questions = [
    "navigation button",
]

query_vectors = [model.encode(str(question)).tolist() for question in query_questions]

query_results = index.query(queries=query_vectors, top_k=5)

In [15]:
query_results

{'results': [{'matches': [{'id': '1001', 'score': 0.813644469, 'values': []},
                          {'id': '1003', 'score': 0.357704908, 'values': []},
                          {'id': '1004', 'score': 0.314247638, 'values': []},
                          {'id': '1002', 'score': 0.298523784, 'values': []},
                          {'id': '1005', 'score': 0.278602391, 'values': []}],
              'namespace': ''}]}

In [16]:
# Extract matches and scores from the results
matches = []
scores = []
for match in query_results['results'][0]['matches']:
    matches.append(match['id'])
    scores.append(match['score'])

# Create DataFrame with only matches and scores
matches_df = pd.DataFrame({'id': matches, 'score': scores})


In [17]:
df["ticketno"] = df["ticketno"].astype(str)

matches_df.merge(df,left_on="id",right_on="ticketno")

Unnamed: 0,id,score,ticketno,complains,question_vector
0,1001,0.813644,1001,Broken navigation button on the website,"[-0.2649574875831604, -0.17953598499298096, 0...."
1,1003,0.357705,1003,Unable to reset password,"[-0.21170000731945038, 0.2875896692276001, -0...."
2,1004,0.314248,1004,App crashes on the latest iOS update,"[-0.008352995850145817, -0.13370580971240997, ..."
3,1002,0.298524,1002,Incorrect pricing displayed for a product,"[0.10973000526428223, 0.3845505118370056, 0.12..."
4,1005,0.278602,1005,Payment processing error during checkout,"[-0.17191250622272491, 0.37106096744537354, 0...."


In [18]:
# Delete

In [19]:
index.delete(deleteAll='true', namespace="")

{}