In [1]:
pip install -U pandas pinecone-client sentence-transformers tqdm

Note: you may need to restart the kernel to use updated packages.


In [34]:
from IPython.display import HTML
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
pip install  wget

Note: you may need to restart the kernel to use updated packages.


In [35]:
import pandas as pd

In [36]:
# Load dataset to a pandas dataframe
df = pd.read_csv(
     "gifdata.tsv",
    delimiter="\t",
    names=['pattern','responses']
)
df.head()


Unnamed: 0,pattern,responses
0,pattern,responses
1,Hi,https://media.giphy.com/media/9XeR2SAyL9YixCYN...
2,How are you,https://media.giphy.com/media/kelBe4wdeugFwq5h...
3,Is anyone there?,https://media.giphy.com/media/16pLCtXiRDKCXsj7...
4,Hello,https://media.giphy.com/media/9XeR2SAyL9YixCYN...


In [37]:
print(df)

                               pattern  \
0                              pattern   
1                                   Hi   
2                          How are you   
3                     Is anyone there?   
4                                Hello   
5                             Good day   
6                                  Bye   
7                        See you later   
8                              Goodbye   
9                               Thanks   
10                           Thank you   
11                      That's helpful   
12                          I am bored   
13   What should I do when I am bored?   
14            What do in my free time?   
15                      I am Sad today   
16  My heart is filled with sorrowness   
17                    Finally I did it   
18                       I am so Happy   
19                  I am on cloud nine   
20                           I am down   
21                           depressed   

                                 

In [38]:
df.to_csv('GfG1.csv',index=False)

In [39]:
len(df)

22

In [40]:
# Number of *unique* GIFs in the dataset
len(df["responses"].unique())

14

In [41]:
dupes = df['responses'].value_counts().sort_values(ascending=False)
dupes.head()

https://media.giphy.com/media/WG1YcoKVPwhR30HWP9/giphy.gif    4
https://media.giphy.com/media/1iqHYXsd9KsFOqPIBi/giphy.gif    3
https://media.giphy.com/media/57UCJutzbAWdUrVIyv/giphy.gif    3
https://media.giphy.com/media/9XeR2SAyL9YixCYN0b/giphy.gif    2
responses                                                     1
Name: responses, dtype: int64

In [42]:
dupe_url = "https://giphy.com/gifs/thesimpsons-the-simpsons-3x21-3orif60Klq3hPriMw0"
dupe_df = df[df['responses'] == dupe_url]

# let's take a look at this GIF and it's duplicated descriptions
for _, gif in dupe_df.iterrows():
    HTML(f"<img src={gif['responses']} style='width:120px; height:90px'>")
    print(gif["pattern"])


In [43]:
for _, gif in df[:5].iterrows():
  HTML(f"<img src={gif['responses']} style='width:120px; height:90px'>")
  print(gif["pattern"])

pattern


Hi


How are you


Is anyone there?


Hello


In [44]:
import pinecone

# Connect to pinecone environment
pinecone.init(
    api_key="8d312ebd-f0c3-4e8f-af86-ef544975b895",
    environment="us-west1-gcp"
)

index_name = 'gif-responses'

# check if the gif-search exists
if index_name not in pinecone.list_indexes():
    # create the index if it does not exist
    pinecone.create_index(
        index_name,
        dimension=384,
        metric="cosine"
    )

# Connect to gif-search index we created
index = pinecone.Index(index_name)

In [45]:
from sentence_transformers import SentenceTransformer

In [46]:
# Initialize retriever with SentenceTransformer model 
retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
retriever

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
)

In [47]:
from tqdm.auto import tqdm

# we will use batches of 4
batch_size = 4

for i in tqdm(range(0, len(df), batch_size)):
    # find end of batch
    i_end = min(i+batch_size, len(df))
    # extract batch
    batch = df.iloc[i:i_end]
    # generate embeddings for batch
    emb = retriever.encode(batch['pattern'].tolist()).tolist()
    # get metadata
    meta = batch.to_dict(orient='records')

    # create IDs
    ids = [f"{idx}" for idx in range(i, i_end)]
    # add all to upsert list
    to_upsert = list(zip(ids, emb, meta))
    # upsert/insert these records to pinecone
    _ = index.upsert(vectors=to_upsert)

    
# check that we have all vectors in index
index.describe_index_stats()

  0%|          | 0/6 [00:00<?, ?it/s]

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 22}},
 'total_vector_count': 22}

In [48]:
def search_gif(query):
    # Generate embeddings for the query
    xq = retriever.encode(query).tolist()
    # Compute cosine similarity between query and embeddings vectors and return top 10 URls
    xc = index.query(xq, top_k=1,
                    include_metadata=True)
    result = []
    for context in xc['matches']:
        url = context['metadata']['responses']
        result.append(url)
    return result

In [49]:
 xq = retriever.encode("hi").tolist()
 print(xq)
 print(len(xq))


[-0.09047619998455048, 0.04043962433934212, 0.023905672132968903, 0.05894798785448074, -0.022882333025336266, -0.04722011461853981, 0.045047588646411896, 0.015786360949277878, -0.048199474811553955, -0.037794120609760284, -0.01907762512564659, 0.021308820694684982, -0.00468306802213192, -0.04330817982554436, 0.05991475284099579, 0.05910336226224899, -0.02803672105073929, -0.0592183955013752, -0.12440313398838043, -0.035599932074546814, -0.006080515217036009, 0.03242906928062439, -0.03780077025294304, 0.02471090853214264, -0.042724400758743286, -0.04245385900139809, 0.04593567177653313, 0.0986255332827568, -0.049998022615909576, -0.03523582965135574, 0.07083971053361893, 0.03316322714090347, 0.02658836357295513, 0.00017324811778962612, 0.0038816644810140133, 0.03046717867255211, -0.07820263504981995, -0.12037956714630127, 0.018041519448161125, 0.02282908372581005, -0.0017749897670000792, -0.023449871689081192, 0.003058150876313448, 0.024355724453926086, 0.044153954833745956, -0.04010969

In [50]:
 xc = index.query(xq, top_k=1,
                    include_metadata=True)
 print(xc)

{'matches': [{'id': '1',
              'metadata': {'pattern': 'Hi',
                           'responses': 'https://media.giphy.com/media/9XeR2SAyL9YixCYN0b/giphy.gif'},
              'score': 1.0,
              'sparseValues': {},
              'values': []}],
 'namespace': ''}


In [51]:
def display_gif(urls):
    figures = []
    for url in urls:
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{url}" style="width: 120px; height: 90px" >
            </figure>
        ''')
    return HTML(data=f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    ''')

In [52]:
gifs = search_gif("hi")
display_gif(gifs)

In [53]:
gifs = search_gif("Thank you")
display_gif(gifs)

In [54]:
gifs = search_gif("I am on cloud nine")
display_gif(gifs)

In [55]:
gifs = search_gif("My heart is filled with sorrownes")
display_gif(gifs)

In [56]:
gifs = search_gif("depressed")
display_gif(gifs)


In [57]:
gifs = search_gif("What do in my free time?")
display_gif(gifs)

In [58]:
gifs = search_gif("Finally I did it")
display_gif(gifs)

In [27]:
!pip install --upgrade chatterbot_corpus

Collecting chatterbot_corpus
  Downloading chatterbot_corpus-1.2.0-py2.py3-none-any.whl (117 kB)
     ------------------------------------ 117.3/117.3 kB 524.6 kB/s eta 0:00:00
Collecting PyYAML<4.0,>=3.12
  Downloading PyYAML-3.13.tar.gz (270 kB)
     ------------------------------------ 270.6/270.6 kB 537.9 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: PyYAML
  Building wheel for PyYAML (setup.py): started
  Building wheel for PyYAML (setup.py): finished with status 'done'
  Created wheel for PyYAML: filename=PyYAML-3.13-cp39-cp39-win_amd64.whl size=43096 sha256=8747ca7c134fbdae3b2d3634ede99e8778361b34295a79e9a0c4f169acdaa956
  Stored in directory: c:\users\selva\appdata\local\pip\cache\wheels\81\6e\87\725bed1db7f86e1c7091ef5f4a4f11b0fcf7023c2be4fc29db
Successfully built PyYAML
Installing collected packages: PyYAML, chatterbot_corpus
  Attempting uninstall: PyYAML
    Foun

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 4.24.0 requires pyyaml>=5.1, but you have pyyaml 3.13 which is incompatible.
pinecone-client 2.0.13 requires pyyaml>=5.4, but you have pyyaml 3.13 which is incompatible.
huggingface-hub 0.10.1 requires pyyaml>=5.1, but you have pyyaml 3.13 which is incompatible.


In [32]:
print("you")
query=input("Enter your Query")
gifs = search_gif(query)
display_gif(gifs)

you
Enter your Queryfinally i did it


In [59]:
while(True):
    print("you")
    query=input("Enter your Query:-")
    gifs = search_gif(query)
    print("Bot")
    display_gif(gifs)
    

you
Enter your Query:-hi
Bot


you
Enter your Query:-depressed
Bot


you
Enter your Query:-good morning
Bot


you
Enter your Query:-good evening
Bot


you
Enter your Query:-i am on cloud nine
Bot


you


KeyboardInterrupt: Interrupted by user