In [89]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans, AffinityPropagation
from scipy.spatial.distance import cdist, pdist
import numpy as np
import matplotlib.pyplot as plt
from mplcursors import cursor
%matplotlib qt

## Live synchronization with the simulator with ROS

To display live updates in the social situation simulator when hovering a specific datapoint in the figures, you need to have ROS installed on your system.

For graphs displaying 2 situations at the same time, you need to start a second instance of the simulator with the parameter `--scene-topic=/socialsituation2`.

In [90]:
import rospy
from std_msgs.msg import String
rospy.init_node("sss_analysis")
pub = rospy.Publisher("/socialsituation", String, queue_size=1)
pub2 = rospy.Publisher("/socialsituation2", String, queue_size=1)


In [91]:
#df = pd.read_csv("openai-embeddings.csv",index_col=0)
#df = pd.read_csv("llama2-13B-embeddings-base-names.csv",index_col=0)
#idx_embedding = 5

#df = pd.read_csv("situation_1-random-names-egocentric-embeddings.csv",index_col=0)
df = pd.read_csv("data/situation_1/embeddings-openai-stable-names-variations-egocentric.csv",index_col=0)
#df = pd.read_csv("situation_1-random-names-allocentric-embeddings.csv",index_col=0)
idx_embedding = 8

pd.set_option('display.max_colwidth', 50)
df

Unnamed: 0,group,engaged,ts,viewed_by,viewed_by_name,template,desc,short_code,0,1,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,0,1,3.0,{D},George,{A} and {B} are looking at each other; {B} is ...,Alice and Oliver are looking at each other; Ol...,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.009833,-0.01396,...,0.001958,0.002433,0.005423,-0.012253,-0.002549,-0.006706,-0.011069,-0.001882,-0.007362,-0.037269
1,0,1,3.0,{D},George,{C} is not far from me; {C} and I are looking ...,Phoebe is not far from me; Phoebe and I are lo...,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.007374,-0.01525,...,-0.007586,-0.003352,0.000557,-0.013666,-0.008662,-0.005026,-0.010278,0.000187,-0.007271,-0.044616
2,0,1,3.0,{D},George,{B} is talking; {C} and I are looking at each ...,Oliver is talking; Phoebe and I are looking at...,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.011235,-0.016491,...,0.003049,-0.001955,0.005845,-0.014566,-0.008334,-0.004746,-0.015946,0.004094,-0.010532,-0.037795
3,0,1,3.0,{D},George,{C} is not far from me; {B} is talking; {C} an...,Phoebe is not far from me; Oliver is talking; ...,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.009701,-0.01565,...,-0.004568,-0.001739,-0.000321,-0.01299,-0.009347,-0.004558,-0.011609,-0.000811,-0.00848,-0.043197
4,0,1,3.0,{D},George,{B} is talking; {A} and {B} are looking at eac...,Oliver is talking; Alice and Oliver are lookin...,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.013073,-0.013594,...,-0.003246,-0.00709,0.005271,-0.012278,-0.013289,-0.005459,-0.014243,0.001094,-0.005599,-0.041584
5,1,0,2.5,{D},George,{A} and {B} are looking at each other; {A} is ...,Alice and Oliver are looking at each other; Al...,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.008431,-0.008636,...,0.002797,0.00718,0.011754,-0.010029,-0.007302,-0.005563,-0.006265,-0.005001,-0.003651,-0.046376
6,1,0,2.5,{D},George,{C} is not far from me; {C} and I are looking ...,Phoebe is not far from me; Phoebe and I are lo...,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.008945,-0.007589,...,-0.000303,0.002306,0.00259,-0.009363,-0.011038,0.001586,-0.007524,-0.001015,-0.002859,-0.049551
7,1,0,2.5,{D},George,{C} is not far from me; {A} and {B} are lookin...,Phoebe is not far from me; Alice and Oliver ar...,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.006289,-0.009113,...,-4.8e-05,0.004297,0.005352,-0.00921,-0.010093,0.002759,-0.008503,-9.7e-05,-0.001741,-0.049121
8,1,0,2.5,{D},George,{A} is not far from me; {C} is not far from me...,Alice is not far from me; Phoebe is not far fr...,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.00777,-0.007375,...,-0.002983,0.000487,0.003788,-0.00613,-0.012001,-0.006612,-0.004703,-0.006276,-0.000467,-0.045029
9,1,0,2.5,{D},George,{B} is passing by; {C} is not far from me; {A}...,Oliver is passing by; Phoebe is not far from m...,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.013025,-0.01129,...,0.00402,0.00178,0.006007,-0.001916,-0.019045,-0.007335,-0.00222,0.000423,-0.002256,-0.043347


In [92]:
pca_dim=5
pca = PCA(n_components=pca_dim)
PCA_result = pca.fit_transform(df.iloc[:,idx_embedding:])
pca_df = pd.DataFrame(PCA_result,index=df.index)
pca_df

Unnamed: 0,0,1,2,3,4
0,-0.019409,-0.02671,0.053117,0.018842,-0.055503
1,-0.07053,-0.029333,0.073071,0.041824,-0.053447
2,-0.026246,-0.050289,0.038233,-0.026342,-0.070938
3,-0.07083,-0.02658,0.064063,0.028895,-0.061359
4,-0.02779,-0.068456,0.047922,-0.013852,-0.068803
5,-0.000602,0.076423,0.062193,-0.003376,-0.07595
6,-0.067981,0.061236,0.103364,0.00013,-0.026256
7,-0.077422,0.071769,0.093021,0.00115,-0.038344
8,-0.003055,0.046382,0.096698,0.029625,-0.05871
9,-0.023338,0.022757,0.103761,-0.079904,-0.062715


In [93]:
PCA_values = np.arange(pca.n_components_) + 1
plt.plot(PCA_values, pca.explained_variance_ratio_, 'o-', linewidth=2, color='blue')
plt.title('Scree Plot')
plt.xlabel('Principal Component')
plt.ylabel('Variance Explained')

Text(0, 0.5, 'Variance Explained')

In [94]:
Z = pca_df.iloc[:,:3].pivot_table(index=0, columns=1, values=2).T.values
Z = np.nan_to_num(Z, copy=True, nan=0.0)

In [95]:
X_unique = np.sort(pca_df[0].unique())
Y_unique = np.sort(pca_df[1].unique())
X, Y = np.meshgrid(X_unique, Y_unique)

In [104]:
groups_df = pca_df.groupby(df["group"]).mean()

In [202]:
def labelize(idx):
    pub.publish(df.iloc[idx]["short_code"])
    return "idx=%s, %s, t=%s\n- " % (idx,df.iloc[idx]["viewed_by_name"],df.iloc[idx]["ts"])  + \
           "\n- ".join(df.iloc[idx]["desc"].split(";"))


plt.figure(figsize=(25,25))

#for i in range(pca_df.shape[0]):
#    plt.annotate(df["group"].tolist()[i], (pca_df[0].tolist()[i], pca_df[1].tolist()[i]))
plt.scatter(pca_df[0], pca_df[1],c=df["group"]%10,cmap="tab10")

c=cursor(hover=True)
c.connect(
    "add", lambda sel: sel.annotation.set_text(labelize(sel.index)))

plt.scatter(groups_df[0], groups_df[1],c=groups_df.index%10,cmap="tab10", s=200)
for i in range(groups_df.shape[0]):
    plt.annotate(groups_df.index[i], (groups_df[0].tolist()[i], groups_df[1].tolist()[i]))
    


## Similarity between different social situations

In [215]:
def label(coord, df):
    d1,d2=coord
    desc1=df.iloc[d1]
    desc2=df.iloc[d2]
    
    pub.publish(desc1["short_code"])
    pub2.publish(desc2["short_code"])
    
    return "➡️ %s, %s, t=%s\n- " % (d1, desc1["viewed_by_name"],desc1["ts"])  + \
           "\n- ".join(desc1["desc"].split(";")) + \
           "\n⬇️ %s, %s, t=%s\n- " % (d2, desc2["viewed_by_name"],desc2["ts"]) + \
           "\n- ".join(desc2["desc"].split(";"))

def plot_similarity_matrix(df, embeddings_idx=0):
    
    similarity = cosine_similarity(df.iloc[:,embeddings_idx:])

    fig, ax = plt.subplots(figsize=(20,20))
    cax = ax.matshow(similarity, interpolation='nearest')
    ax.grid(True)
    plt.title('Social scenes similarity matrix')
    plt.xticks(range(len(group_embeddings)));
    plt.yticks(range(len(group_embeddings)));
    fig.colorbar(cax, ticks=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, .75,.8,.85,.90,.95,1])
    c=cursor(hover=True)
    c.connect(
        "add", lambda sel: sel.annotation.set_text(label(sel.index, df)))


In [145]:
group_descs = df.groupby("group")[["desc", "ts", "viewed_by_name", "short_code"]].apply(lambda row:row.iloc[0,:])
group_embeddings = df.groupby("group").mean().iloc[:,2:]
group_df = pd.concat([group_descs,group_embeddings], axis=1)

plot_similarity_matrix(group_df, embeddings_idx=4)

## Assessing the quality of the 'variations' clustering

Probably useful to compare difference sentence embeddings.
Here, results with OpenAI sentence embeddings give a Calinski-Harabasz score of ~250.


See also below how well a simple KNN can assign a variation of a scene to its cluster.

In [114]:
group_pca_df = pca_df
group_pca_df["group"] = df["group"]
group_pca_df = pd.merge(group_pca_df, groups_df, left_on="group", right_index=True, suffixes=["","_group"])
group_pca_df

Unnamed: 0,0,1,2,3,4,group,0_group,1_group,2_group,3_group,4_group
0,-0.019409,-0.02671,0.053117,0.018842,-0.055503,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
1,-0.07053,-0.029333,0.073071,0.041824,-0.053447,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
2,-0.026246,-0.050289,0.038233,-0.026342,-0.070938,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
3,-0.07083,-0.02658,0.064063,0.028895,-0.061359,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
4,-0.02779,-0.068456,0.047922,-0.013852,-0.068803,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
94,-0.070635,-0.040218,0.041311,0.039672,-0.036346,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
95,-0.02202,-0.059768,0.064873,0.02128,-0.052108,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
153,-0.070424,-0.027773,0.08618,0.043912,-0.054498,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
154,-0.055583,-0.057854,0.049059,0.023882,-0.03835,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847
213,-0.030797,-0.045287,0.070719,0.013334,-0.062743,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847


In [115]:


group_pca_df["dist_to_centre"] = ((group_pca_df[[str(x) for x in range(pca_dim)]] - group_pca_df[["%s_group" % x for x in range(pca_dim)]].values)**2).sum(axis=1) ** 0.5
group_pca_df

Unnamed: 0,0,1,2,3,4,group,0_group,1_group,2_group,3_group,4_group,dist_to_centre
0,-0.019409,-0.02671,0.053117,0.018842,-0.055503,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.032661
1,-0.07053,-0.029333,0.073071,0.041824,-0.053447,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.050374
2,-0.026246,-0.050289,0.038233,-0.026342,-0.070938,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.044236
3,-0.07083,-0.02658,0.064063,0.028895,-0.061359,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.041436
4,-0.02779,-0.068456,0.047922,-0.013852,-0.068803,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.036825
94,-0.070635,-0.040218,0.041311,0.039672,-0.036346,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.049371
95,-0.02202,-0.059768,0.064873,0.02128,-0.052108,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.029635
153,-0.070424,-0.027773,0.08618,0.043912,-0.054498,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.058063
154,-0.055583,-0.057854,0.049059,0.023882,-0.03835,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.030484
213,-0.030797,-0.045287,0.070719,0.013334,-0.062743,0,-0.042662,-0.047055,0.054934,0.008681,-0.057847,0.020945


In [116]:
group_dists_df = group_pca_df.groupby("group")["dist_to_centre"].agg([np.mean, np.std])
group_dists_df.plot(kind = "bar", y = "mean", legend = False,
          xerr = "std", title = "Distance to variations' embedding centres")

<matplotlib.axes._subplots.AxesSubplot at 0x7f4e8bac8eb0>

In [117]:
from sklearn.metrics import silhouette_score

#silhouette_score(df.iloc[:,idx_embedding:],group_pca_df["group"],metric='cosine')
silhouette_score(group_pca_df[[str(x) for x in range(pca_dim)]],group_pca_df["group"])

0.19874506716416832

In [118]:
from sklearn.metrics import calinski_harabasz_score
calinski_harabasz_score(group_pca_df[[str(x) for x in range(pca_dim)]],group_pca_df["group"])


189.08061512453693

## Training a classifier to identify which scene a variation belong to

- train a classifier to recognise variations of situations with a set of names
- try to recognise the same variations, but with different names

In [119]:
# train classifier using full embeddings
X = df.iloc[:,idx_embedding:]

# train classifier using PCA
#X = pca_df

y = df["group"]

In [120]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [121]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)

KNeighborsClassifier()

### Predicting on a subset of the same data

- openai (text-embedding-ada-002): ~83%
- llama2-13B-4bit quantizied: ~46%

- random names, allocentric, openai: ~17%
- random names, egocentric, openai: ~13%
- stable names, egocentric, openai: ~76%


In [122]:
neigh.score(X_test, y_test)

0.7560975609756098

### Predicting on same scene, different names

In [339]:
df_random_names = pd.read_csv("si",index_col=0)

In [340]:
PCA_result = pca.fit_transform(df_random_names.iloc[:,idx_embedding:])
pca_random_names_df = pd.DataFrame(PCA_result,index=df_random_names.index)

In [341]:
neigh.score(df_random_names.iloc[:,idx_embedding:], df_random_names["group"])
#neigh.score(pca_random_names_df, df_random_names["group"])

0.46190476190476193

In [352]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

conf_matrix=confusion_matrix(df_random_names["group"],neigh.predict(df_random_names.iloc[:,idx_embedding:]))

In [None]:
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap=plt.cm.Blues, alpha=1)
#for i in range(conf_matrix.shape[0]):
#    for j in range(conf_matrix.shape[1]):
#        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')


c=cursor(hover=True)

plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)


=> Accuracy: ~45% with 51 classes

## Different scene: computing similarity of situations

In [261]:
sit2 = pd.read_csv("data/situation_lfe_1/embeddings-openai-stable-names-no-variations-egocentric.csv",index_col=0)

group_descs = df.groupby("group")[["desc", "ts", "viewed_by_name", "short_code"]].apply(lambda row:row.iloc[0,:])
group_embeddings = df.groupby("group").mean().iloc[:,2:]
sit1_groups = pd.concat([group_descs,group_embeddings], axis=1)

sit2 = sit2[sit1_groups.columns]


In [262]:
#together=pd.concat([sit1_groups, sit2]).reset_index(drop=True)
together=sit1_groups.append(sit2)
plot_similarity_matrix(together, embeddings_idx=4)


  together=sit1_groups.append(sit2)


## Training a classifier to recognise engagement


In [82]:
y = df["engaged"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=3)

In [83]:
neigh.score(X_test, y_test)

0.7926829268292683

In [84]:
neigh.score(df_random_names.iloc[:,idx_embedding:], df_random_names["engaged"])

NameError: name 'df_random_names' is not defined

### TODO: compare to visual social engagement from hri_engagement

## Clustering social situations

Automatic clustering.
Scene with engagement are plotted with a star. Black dots indicate the 'central situation', eg the know social situation the closest to the clusters' centers.

In [27]:
X = df.iloc[:,idx_embedding:]

clustering = KMeans(n_clusters=6, random_state=1, init='k-means++')
#clustering = AffinityPropagation(random_state=0)

label = clustering.fit_predict(X)
cluster_centers=clustering.cluster_centers_

In [268]:
def closest_situations(embeddings, df, idx_embedding=idx_embedding):
    """ Find closest point from a list of points. """
    return np.array([cdist([emb], df.iloc[:,idx_embedding:],metric="cosine").argmin() for emb in embeddings])

In [167]:
central_situations = closest_situations(cluster_centers, df)

In [30]:
def labelize(idx):
    idx=central_situations[idx]
    return "idx=%s, %s, t=%s\n- " % (idx,df.iloc[idx]["viewed_by"],df.iloc[idx]["ts"])  + \
           "\n- ".join(df.iloc[idx]["desc"].split(";"))

plt.clf()

plt.scatter(pca_df.iloc[central_situations][0], pca_df.iloc[central_situations][1], s=200,c="black")



c=cursor(hover=True)
c.connect(
    "add", lambda sel: sel.annotation.set_text(labelize(sel.index))
)

plt.scatter(pca_df[df["engaged"] == 1][0], pca_df[df["engaged"] == 1][2],c=label[df["engaged"] == 1],cmap="tab10", marker="*")
plt.scatter(pca_df[df["engaged"] == 0][0], pca_df[df["engaged"] == 0][2],c=label[df["engaged"] == 0],cmap="tab10", marker="o")




<matplotlib.collections.PathCollection at 0x7f4ee85cedf0>

In [156]:
df.iloc[central_situations]["desc"]

1390    Jane is talking; Will and Joe are looking at e...
988     Emily and Jane are looking at each other; Will...
884     Will and Joe are looking at each other; Emily ...
709     Joe is talking; Violet is talking; Joe is clos...
706     Will is talking; Joe is talking; Jane is not f...
555     Jane is walking towards Emily; Joe is not far ...
Name: desc, dtype: object

In [168]:
central_situations

array([1390,  988,  884,  709,  706,  555])

# Evolution of social situations

In [324]:
situations = pd.read_csv("situation_1.csv")
situations

Unnamed: 0,engaged,viewed_by,actual_ts,t-0.0,t-0.5,t-1.0,t-1.5,t-2.0,t-2.5,t-3.0
0,1,Joe,3.0,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Will is walking towards Joe; Emily is not far ...,Will is walking towards Joe; Emily is not far ...,Emily is looking at Joe; Will is walking towar...,Emily is looking at Joe; Will is walking towar...
1,1,Emily,3.0,Jane is not far from Emily; Emily and Jane are...,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Joe is looking at Will; Joe is not far from Emily,Joe is looking at Will; Jane is looking at Wil...,Joe is not far from Emily,Joe is not far from Emily
2,1,Will,3.0,Will and Joe are looking at each other; Violet...,Will and Joe are looking at each other; Violet...,Will and Joe are looking at each other; Joe is...,Joe is looking at Will; Joe is not far from Will,Joe is not far from Will; Jane is walking towa...,Joe is not far from Will; Jane is walking towa...,Joe is looking at Will; Joe is not far from Will
3,0,Violet,3.0,no one is around,no one is around,no one is around,no one is around,no one is around,no one is around,no one is around
4,1,Jane,3.0,Emily and Jane are looking at each other; Emil...,Will is close to Jane; Emily and Jane are look...,Emily and Jane are looking at each other; Emil...,no one is around,Will is not far from Jane,Joe is looking at Will; Emily is looking at Jo...,Joe is looking at Will; Emily is looking at Joe
5,1,Joe,3.5,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Will is walking towards Joe; Emily is not far ...,Will is walking towards Joe; Emily is not far ...,Emily is looking at Joe; Will is walking towar...
6,1,Emily,3.5,Will and Joe are looking at each other; Jane i...,Jane is not far from Emily; Emily and Jane are...,Will and Joe are looking at each other; Emily ...,Will and Joe are looking at each other; Emily ...,Joe is looking at Will; Joe is not far from Emily,Joe is looking at Will; Jane is looking at Wil...,Joe is not far from Emily
7,1,Will,3.5,Will and Joe are looking at each other; Violet...,Will and Joe are looking at each other; Violet...,Will and Joe are looking at each other; Violet...,Will and Joe are looking at each other; Joe is...,Joe is looking at Will; Joe is not far from Will,Joe is not far from Will; Jane is walking towa...,Joe is not far from Will; Jane is walking towa...
8,0,Violet,3.5,no one is around,no one is around,no one is around,no one is around,no one is around,no one is around,no one is around
9,1,Jane,3.5,Emily is looking at Jane; Emily is not far fro...,Emily and Jane are looking at each other; Emil...,Will is close to Jane; Emily and Jane are look...,Emily and Jane are looking at each other; Emil...,no one is around,Will is not far from Jane,Joe is looking at Will; Emily is looking at Jo...


In [325]:
# also reverse the order: the 1st row of resulting df is [time_window]s before the timestamp of the last row


sit1=situations.iloc[0:1,3:].transpose().iloc[::-1].rename(columns={0:"desc"})
sit1=pd.merge(sit1, df, on="desc")

sit1



Unnamed: 0,desc,group,engaged,viewed_by,ts,0,1,2,3,4,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,Emily is looking at Joe; Will is walking towar...,4,0,Joe,0.5,-0.024636,-0.003566,0.00913,0.019873,-0.022481,...,0.00036,0.001633,0.002648,-0.008682,-0.015021,-0.017844,-0.002546,-0.014605,-0.009873,-0.034982
1,Emily is looking at Joe; Will is walking towar...,4,0,Joe,0.5,-0.024636,-0.003566,0.00913,0.019873,-0.022481,...,0.00036,0.001633,0.002648,-0.008682,-0.015021,-0.017844,-0.002546,-0.014605,-0.009873,-0.034982
2,Will is walking towards Joe; Emily is not far ...,3,0,Joe,1.5,-0.019964,-0.004463,0.011468,0.032493,-0.019559,...,-0.005573,0.008635,0.007567,-0.006069,-0.017321,-0.008376,-0.005721,-0.02076,-0.017814,-0.026146
3,Will is walking towards Joe; Emily is not far ...,3,0,Joe,1.5,-0.019964,-0.004463,0.011468,0.032493,-0.019559,...,-0.005573,0.008635,0.007567,-0.006069,-0.017321,-0.008376,-0.005721,-0.02076,-0.017814,-0.026146
4,Will and Joe are looking at each other; Emily ...,2,0,Joe,2.0,-0.026705,-0.004962,0.011093,0.023816,-0.021518,...,-0.00425,0.004789,0.002239,-0.007954,-0.013494,-0.01228,-0.00114,-0.015394,-0.018128,-0.032919
5,Will and Joe are looking at each other; Emily ...,1,0,Joe,2.5,-0.028806,-0.003917,0.011296,0.027101,-0.027695,...,0.002139,0.003814,0.003468,-0.006194,-0.007285,-0.007473,-0.001318,-0.018278,-0.016547,-0.041
6,Will and Joe are looking at each other; Emily ...,0,1,Joe,3.0,-0.032531,-0.005525,0.018274,0.023091,-0.020927,...,-0.002622,0.001248,0.000222,-0.012498,-0.009794,-0.0132,-0.00435,-0.016999,-0.01848,-0.035158


In [326]:
pd.DataFrame(cosine_similarity(sit1.iloc[:,idx_embedding:], sit1.iloc[:,idx_embedding:]))

Unnamed: 0,0,1,2,3,4,5,6
0,1.0,1.0,0.984643,0.984643,0.983437,0.970087,0.952164
1,1.0,1.0,0.984643,0.984643,0.983437,0.970087,0.952164
2,0.984643,0.984643,1.0,1.0,0.977532,0.960564,0.944359
3,0.984643,0.984643,1.0,1.0,0.977532,0.960564,0.944359
4,0.983437,0.983437,0.977532,0.977532,1.0,0.987556,0.977508
5,0.970087,0.970087,0.960564,0.960564,0.987556,1.0,0.981355
6,0.952164,0.952164,0.944359,0.944359,0.977508,0.981355,1.0


In [327]:
def plot_embedding_path(embeddings):
    #pca = PCA(n_components=2)
    #PCA_result = pca.fit_transform(embeddings)
    #pca_df = pd.DataFrame(PCA_result)
    plt.plot(pca_df[0], pca_df[1],'-o',color="blue")
    for i in range(pca_df.shape[0]):
        plt.annotate(i, (pca_df[0].tolist()[i], pca_df[1].tolist()[i]))


for i in range(len(situations[situations["engaged"] == 1])):

    sit=situations.iloc[i:i+1,3:].transpose().iloc[::-1].rename(columns={i:"desc"})
    sit=pd.merge(sit, df, on="desc")
    plot_embedding_path(sit1.iloc[:,idx_embedding:])


  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]


  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]


  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]


  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]
  ndim = x[:, None].ndim
  x = x[:, np.newaxis]
  y = y[:, np.newaxis]


In [264]:
pca_df.loc[df['desc'] == sit1.iloc[0]["desc"]]

Unnamed: 0,0,1,2,3,4
59,-0.083003,-0.007743,-0.038595,-0.00997,0.215661


# Querying the models from natural language prompts


In [188]:
from langchain.embeddings import OpenAIEmbeddings
embeddings_model = OpenAIEmbeddings()

In [263]:
desc = "No one is close to me"
emb = embeddings_model.embed_query(desc)


ValueError: XA and XB must have the same number of columns (i.e. feature dimension.)

In [269]:
sit = closest_situations([emb],together, idx_embedding=4)
print(sit)
pub.publish(together.iloc[sit]["short_code"].values[0])

[15]


In [266]:
together

Unnamed: 0,desc,ts,viewed_by_name,short_code,0,1,2,3,4,5,...,1526,1527,1528,1529,1530,1531,1532,1533,1534,1535
0,Alice and Oliver are looking at each other; Ol...,3.0,George,eJxtkN1KxDAQhV+l5LoNmSSTH++98QH0opSli3G3GFvYra...,-0.011136,-0.014125,0.005995,-0.013961,-0.011234,0.03928,...,-0.001562,-0.002217,0.003478,-0.01156,-0.008945,-0.006309,-0.009988,0.001129,-0.006959,-0.040515
1,Alice and Oliver are looking at each other; Al...,2.5,George,eJxdks9OwzAMxl8lyrmL7MT5x50LDwCHapo6EbZqoZVYNZ...,-0.009275,-0.009785,0.003105,-0.003694,-0.015898,0.035067,...,0.001531,0.003827,0.007852,-0.007181,-0.011696,-0.003671,-0.005547,-0.001318,-0.002179,-0.046816
2,Alice and Oliver are looking at each other; Al...,2.0,George,eJxtks9OwzAMxl8lyrmNYsf5x50LDwCHapo6EbpqpZVYNZ...,-0.007565,-0.010636,0.006688,-0.004459,-0.010645,0.03662,...,-0.007124,0.002291,0.007509,-0.007278,-0.012609,-0.008869,-0.005123,0.002237,-0.003917,-0.040813
3,Alice is not far from me; Oliver is not far fr...,1.5,George,eJxtUsFOwzAM/ZUq5zayXadOuHPhA+AwTVMnwlZRWolVgw...,-0.000792,-0.005676,-0.001429,0.000556,-0.015814,0.027833,...,0.001126,0.00179,0.003399,-0.003042,-0.009881,-0.009346,-0.00549,-0.003642,-0.004249,-0.027254
4,Alice is looking at me; Alice is not far from ...,0.5,George,eJxlks1OwzAMx1+lynmLHOfDLncuPAAcpmnqRNiqlVZi1W...,-0.009057,-0.00426,0.001473,-0.003629,-0.013473,0.028177,...,0.002127,0.001772,0.005767,-0.001981,-0.008515,-0.015675,0.000142,-0.003655,-0.000566,-0.030024
5,Alice and I are looking at each other; Alice i...,3.0,Alice,eJxtkd1KxDAQhV+l5LoNmfxN4r03PoBelLJ0Me4WYwu7dX...,-0.014306,-0.000653,0.006817,-0.003357,-0.005761,0.029359,...,-0.000545,0.002888,0.010775,-0.005269,-0.011115,-0.024723,0.002729,-0.017621,-0.004768,-0.026062
6,Alice and Oliver are looking at each other; Al...,2.5,Alice,eJxdkc9OwzAMxl+lyrmL7MT5x50LDwCHapo6EbaK0EqsGk...,-0.011107,-0.010195,0.005654,-0.008799,-0.013474,0.035579,...,-0.004603,0.002096,0.006431,-0.002954,-0.013055,-0.010377,-0.006723,0.00648,-0.002199,-0.042631
7,Alice and Oliver are looking at each other; Ol...,2.0,Alice,eJxtkc9OwzAMxl+lyrmNYsf5x50LDwCHapo6EbpqoZVYNZ...,-0.007238,-0.012752,0.004093,-0.007892,-0.011951,0.036933,...,-0.005623,-0.000717,0.004092,-0.00718,-0.01155,-0.006253,-0.005249,0.005304,-0.004314,-0.040933
8,Alice is looking at Oliver; Alice is not far f...,1.5,Alice,eJxtUcFOwzAM/ZUq5zayXadOuHPhA+AwTVMnwlZRWolVgw...,-0.000235,-0.007583,-0.003381,-0.017734,-0.009213,0.0253,...,0.004812,0.002943,0.006734,-0.005119,-0.005308,-0.007113,-0.008801,-0.001356,-0.000251,-0.03614
9,Alice is looking at Oliver; Oliver is passing ...,1.0,Alice,eJxtUU1PwzAM/StVzm0UO3E+uHPhB8ChmqZOhK1aaCVWDS...,-0.010635,-0.006276,-0.000119,-0.007587,-0.015841,0.034584,...,0.011929,-0.000559,0.000915,-0.005106,-0.012632,-0.003311,-0.003938,0.003016,-0.000767,-0.043664


## Operations on embeddings

In [290]:
emb = together.iloc[131,idx_embedding:] + together.iloc[122,idx_embedding:]
emb

4      -0.029855
5        0.06443
6      -0.006497
7       0.007195
8       0.022101
9      -0.021718
10      0.023271
11      0.043907
12     -0.020658
13     -0.033801
14      0.010253
15     -0.016344
16      0.061417
17      0.021117
18      0.033379
19      -0.03079
20     -0.030807
21      0.004206
22       0.01827
23     -0.004412
24     -0.072739
25     -0.004711
26       0.00388
27     -0.059423
28      0.008153
29     -0.034721
30      0.045253
31      0.010692
32     -0.020997
33     -0.023717
34     -0.045507
35     -0.017513
36     -0.000171
37     -0.011112
38      0.021757
39     -0.013136
40      0.009342
41       -0.0067
42     -0.001739
43     -0.033992
44     -0.048202
45      0.024441
46     -0.048112
47     -0.011133
48     -0.047885
49       0.03113
50      0.019054
51      0.075911
52     -0.040638
53     -0.038605
54     -0.038749
55      0.008271
56      0.023992
57      0.040982
58     -0.036202
59      0.009052
60     -0.008871
61     -0.003651
62     -0.0203

In [291]:
sit = closest_situations([emb],together)
print(sit)
pub.publish(together.iloc[sit]["short_code"].values[0])

[98]


In [289]:
pub.publish(together.iloc[131]["short_code"])

In [286]:
pub.publish(together.iloc[122]["short_code"])