### OpenAI Authentication

## Visualizing Embeddings

In [1]:
import openai
import os
import pandas as pd
import numpy as np
from nomic import atlas

In [2]:
openai.api_key = os.getenv('OPENAI_API_KEY')

In [3]:
embedding_cache_file = './data/book_embeddings.csv'
df = pd.read_csv(embedding_cache_file)
df

Unnamed: 0.1,Unnamed: 0,isbn13,title,authors,categories,description,published_year,average_rating,embedding
0,6738,9781932206081,Insights,Frederick Lenz,Spiritual life,"In 1983, when Rama - Dr. Frederick P. Lenz rec...",2003.0,5.00,"[0.0025765232276171446, 0.00796287041157484, 0..."
1,4284,9780738539560,Lake Orion,James E. Ingram;Lori Grove,History,"Orion Township, established in 1835, became a ...",2006.0,5.00,"[0.024689819663763046, -0.019411398097872734, ..."
2,3580,9780567044716,Colossians and Philemon,Robert McL Wilson,Religion,For over one hundred years International Criti...,2005.0,5.00,"[-0.00942917913198471, -0.007408167235553265, ..."
3,4306,9780739844328,Bill Gates,Sara Barton-Wood,Juvenile Nonfiction,"Presents the life of Bill Gates, from his chil...",2001.0,5.00,"[0.011024133302271366, -0.02996673434972763, -..."
4,5398,9780851621814,The Complete Theory Fun Factory,Katie Elliott;Ian Martin,Juvenile Nonfiction,(Boosey & Hawkes Scores/Books). Contains the m...,1996.0,5.00,"[-0.012762249447405338, -0.01543029211461544, ..."
...,...,...,...,...,...,...,...,...,...
1995,4871,9780786809943,The Final Battle,Mary Pope Osborne,Juvenile Fiction,After struggling against the gods and his fate...,2005.0,4.08,"[0.009583852253854275, -0.02656134031713009, -..."
1996,4720,9780765309969,Blade of Fortriu,Juliet Marillier,Fiction,As King Bridei prepares to expel the Gaelic in...,2006.0,4.08,"[-0.011146478354930878, -0.022168338298797607,..."
1997,1701,9780330340199,In Pharaoh's Army,Tobias Wolff,"Authors, American",Having survived the extraordinary childhood re...,1995.0,4.08,"[-0.022927450016140938, -0.03294963017106056, ..."
1998,1066,9780143039853,The Outsiders,S. E. Hinton;Jodi Picoult,Fiction,The struggle of three brothers to stay togethe...,1967.0,4.08,"[0.011317840777337551, -0.025469928979873657, ..."


In [4]:
# converting embeddings: str => numpy array
df['embedding'] = df['embedding'].apply(eval).apply(np.array)

In [5]:
df.shape

(2000, 9)

In [6]:
# [{}, {}, {}, {}, ...]
data = df[['title', 'authors', 'categories']].to_dict('records')
data

[{'title': 'Insights',
  'authors': 'Frederick Lenz',
  'categories': 'Spiritual life'},
 {'title': 'Lake Orion',
  'authors': 'James E. Ingram;Lori Grove',
  'categories': 'History'},
 {'title': 'Colossians and Philemon',
  'authors': 'Robert McL Wilson',
  'categories': 'Religion'},
 {'title': 'Bill Gates',
  'authors': 'Sara Barton-Wood',
  'categories': 'Juvenile Nonfiction'},
 {'title': 'The Complete Theory Fun Factory',
  'authors': 'Katie Elliott;Ian Martin',
  'categories': 'Juvenile Nonfiction'},
 {'title': 'The Diamond Color Meditation',
  'authors': 'John Diamond',
  'categories': 'Health & Fitness'},
 {'title': 'Fanning the Flame',
  'authors': 'Christopher J. H. Wright',
  'categories': 'Religion'},
 {'title': 'Ecuador Nature Guide',
  'authors': 'Christopher D. Jiggins',
  'categories': 'Botanique'},
 {'title': 'The Irish Anatomist',
  'authors': 'Keith Donohue',
  'categories': 'Biography & Autobiography'},
 {'title': 'Existential Meditation',
  'authors': 'Simon Clevela

In [7]:
embeddings = list(df['embedding'])
embeddings

[array([ 0.00257652,  0.00796287,  0.00385201, ..., -0.01382774,
        -0.01036059, -0.05705475]),
 array([ 0.02468982, -0.0194114 ,  0.00897992, ...,  0.0167194 ,
        -0.02210339, -0.02918968]),
 array([-0.00942918, -0.00740817, -0.00071232, ..., -0.00430707,
         0.00086473, -0.02157513]),
 array([ 0.01102413, -0.02996673, -0.01992092, ..., -0.04824618,
        -0.02382106, -0.00564667]),
 array([-0.01276225, -0.01543029, -0.0092407 , ..., -0.03287674,
         0.01590073, -0.02513471]),
 array([ 0.00512913,  0.01929048,  0.0012293 , ..., -0.0074149 ,
        -0.00377267, -0.01430809]),
 array([-0.00383353, -0.02118704, -0.01818113, ..., -0.0016842 ,
        -0.00546228, -0.02237882]),
 array([-0.0128755 ,  0.01283017, -0.00322535, ..., -0.01724074,
        -0.01329001, -0.03049189]),
 array([-0.01705124, -0.00085813,  0.02658349, ..., -0.0042765 ,
         0.0093953 , -0.02799416]),
 array([ 0.02626194,  0.0081066 ,  0.00648594, ...,  0.00237464,
        -0.00548173, -0.03

In [8]:
embeddings = np.array(embeddings)

project = atlas.map_embeddings(
    embeddings=embeddings,
    data=data,
    name='Books'
)

[32m2023-05-14 09:05:57.123[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m_create_project[0m:[36m965[0m - [1mCreating project `Books` in organization `dkim12444`[0m
[32m2023-05-14 09:05:58.873[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_embeddings[0m:[36m100[0m - [1mUploading embeddings to Atlas.[0m
2it [00:12,  6.39s/it]                       
[32m2023-05-14 09:06:11.751[0m | [1mINFO    [0m | [36mnomic.project[0m:[36m_add_data[0m:[36m1577[0m - [1mUpload succeeded.[0m
[32m2023-05-14 09:06:11.752[0m | [1mINFO    [0m | [36mnomic.atlas[0m:[36mmap_embeddings[0m:[36m119[0m - [1mEmbedding upload succeeded.[0m
[32m2023-05-14 09:06:13.441[0m | [1mINFO    [0m | [36mnomic.project[0m:[36mcreate_index[0m:[36m1282[0m - [1mCreated map `Books` in project `Books`: https://atlas.nomic.ai/map/4cc58c10-50ed-4dc4-892d-6772326df289/da754422-3d7f-471f-8179-e1ba251a3a7e[0m
[32m2023-05-14 09:06:13.442[0m | [1mINFO    [0m | [36mnomic.atlas