# Pandas - Apply function on multiple columns in a row

* [Find euclidean / cosine distance between a tensor and all tensors stored in a column of dataframe efficently
](https://stackoverflow.com/a/67658411/4281353)

In [6]:
import numpy as np
import tensorflow as tf
import pandas as pd

In [5]:
A = np.array([0.8, 0.9])
B = np.array([1.0, 0.0])

EA = np.linalg.norm(A)
EB = np.linalg.norm(B)
NA = A / EA
NB = B / EB

COS_A_B = np.dot(NA, NB)
COS_A_B

0.6643638388299198

In [52]:
C = np.array([0.5, 0.3])

df = pd.DataFrame(columns=['ID','enc_rep'])
df.loc[0] = [1, A]
df.loc[1] = [2, B]
df.loc[2] = [3, C]
df

Unnamed: 0,ID,enc_rep
0,1,"[0.8, 0.9]"
1,2,"[1.0, 0.0]"
2,3,"[0.5, 0.3]"


In [56]:
cartesian_df = df['enc_rep'].to_frame().merge(df['enc_rep'], how='cross')
cartesian_df

Unnamed: 0,enc_rep_x,enc_rep_y
0,"[0.8, 0.9]","[0.8, 0.9]"
1,"[0.8, 0.9]","[1.0, 0.0]"
2,"[0.8, 0.9]","[0.5, 0.3]"
3,"[1.0, 0.0]","[0.8, 0.9]"
4,"[1.0, 0.0]","[1.0, 0.0]"
5,"[1.0, 0.0]","[0.5, 0.3]"
6,"[0.5, 0.3]","[0.8, 0.9]"
7,"[0.5, 0.3]","[1.0, 0.0]"
8,"[0.5, 0.3]","[0.5, 0.3]"


In [57]:
def f(x, y):
    nx = x / np.linalg.norm(x)
    ny = y / np.linalg.norm(y)
    return np.dot(nx, ny)

In [59]:
cartesian_df['cosine'] = cartesian_df.apply(lambda row: f(row.enc_rep_x, row.enc_rep_y), axis=1)
cartesian_df

Unnamed: 0,enc_rep_x,enc_rep_y,cosine
0,"[0.8, 0.9]","[0.8, 0.9]",1.0
1,"[0.8, 0.9]","[1.0, 0.0]",0.664364
2,"[0.8, 0.9]","[0.5, 0.3]",0.954226
3,"[1.0, 0.0]","[0.8, 0.9]",0.664364
4,"[1.0, 0.0]","[1.0, 0.0]",1.0
5,"[1.0, 0.0]","[0.5, 0.3]",0.857493
6,"[0.5, 0.3]","[0.8, 0.9]",0.954226
7,"[0.5, 0.3]","[1.0, 0.0]",0.857493
8,"[0.5, 0.3]","[0.5, 0.3]",1.0
