# Pandas - Apply function on multiple columns in a row

* [Find euclidean / cosine distance between a tensor and all tensors stored in a column of dataframe efficently
](https://stackoverflow.com/a/67658411/4281353)

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd

2022-02-23 17:44:33.982166: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-23 17:44:33.982220: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
A = np.array([0.8, 0.9])
B = np.array([1.0, 0.0])

EA = np.linalg.norm(A)
EB = np.linalg.norm(B)
NA = A / EA
NB = B / EB

COS_A_B = np.dot(NA, NB)
COS_A_B

0.6643638388299198

In [3]:
C = np.array([0.5, 0.3])

df = pd.DataFrame(columns=['ID','enc_rep'])
df.loc[0] = [1, A]
df.loc[1] = [2, B]
df.loc[2] = [3, C]
df

Unnamed: 0,ID,enc_rep
0,1,"[0.8, 0.9]"
1,2,"[1.0, 0.0]"
2,3,"[0.5, 0.3]"


In [19]:
cartesian_df = df['enc_rep'].to_frame().merge(df['enc_rep'], how='cross')
cartesian_df

Unnamed: 0,enc_rep_x,enc_rep_y
0,"[0.8, 0.9]","[0.8, 0.9]"
1,"[0.8, 0.9]","[1.0, 0.0]"
2,"[0.8, 0.9]","[0.5, 0.3]"
3,"[1.0, 0.0]","[0.8, 0.9]"
4,"[1.0, 0.0]","[1.0, 0.0]"
5,"[1.0, 0.0]","[0.5, 0.3]"
6,"[0.5, 0.3]","[0.8, 0.9]"
7,"[0.5, 0.3]","[1.0, 0.0]"
8,"[0.5, 0.3]","[0.5, 0.3]"


# Multiply columns

In [24]:
%%time
cartesian_df.apply(lambda row: row['enc_rep_x'] * row['enc_rep_y'], axis=1)

CPU times: user 1.2 ms, sys: 195 µs, total: 1.4 ms
Wall time: 1.42 ms


0    [0.6400000000000001, 0.81]
1                    [0.8, 0.0]
2                   [0.4, 0.27]
3                    [0.8, 0.0]
4                    [1.0, 0.0]
5                    [0.5, 0.0]
6                   [0.4, 0.27]
7                    [0.5, 0.0]
8                  [0.25, 0.09]
dtype: object

# Dot product of vectors (consine similarity)

In [33]:
def f(x, y):
    # Normalize the vector length
    nx = x / np.linalg.norm(x)
    ny = y / np.linalg.norm(y)
    
    # Cosine similarity
    return np.dot(nx, ny)

In [20]:
cartesian_df['cosine'] = cartesian_df.apply(lambda row: f(row.enc_rep_x, row.enc_rep_y), axis=1)
cartesian_df

Unnamed: 0,enc_rep_x,enc_rep_y,cosine
0,"[0.8, 0.9]","[0.8, 0.9]",1.0
1,"[0.8, 0.9]","[1.0, 0.0]",0.664364
2,"[0.8, 0.9]","[0.5, 0.3]",0.954226
3,"[1.0, 0.0]","[0.8, 0.9]",0.664364
4,"[1.0, 0.0]","[1.0, 0.0]",1.0
5,"[1.0, 0.0]","[0.5, 0.3]",0.857493
6,"[0.5, 0.3]","[0.8, 0.9]",0.954226
7,"[0.5, 0.3]","[1.0, 0.0]",0.857493
8,"[0.5, 0.3]","[0.5, 0.3]",1.0
