# Decision analysis

Analyse the data from the Large Language Model outputs. 

In [1]:
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd

In [2]:
df=pd.read_csv("/its/home/drs25/Tactile_Language_Model/data/allModelOutputs_test.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,ID,text,gemma,llamas,gpt,mistral,deepseek,gemmaspeed,llamasspeed,gptspeed,deepseekspeed,mistralspeed
0,0,54930,sphere shape; force 0.16270586405201262 newton...,speed action: Reduce. leg spread action: Incr...,,,Based on the tactile sensor readings; I would...,,0.588896,0,0,0,1.274835
1,1,74565,Texture bearing_small; force 17.71733324237945...,speed action: Reduce; leg spread action: Incr...,,,Based on the tactile sensor reading; I would ...,,0.588896,0,0,0,1.274835
2,2,11581,sphere shape; force 0.27560676996615735 newton...,Speed action: Reduce. Leg spread action: Incr...,,,In a hypothetical scenario as a quadruped rob...,,0.588896,0,0,0,1.274835
3,3,34275,sphere shape; force 0.14560190826965339 newton...,Speed action: Decrease; Leg spread action: Inc...,,,Based on the tactile sensor readings; I would...,,0.588896,0,0,0,1.274835
4,4,81554,Texture bowl_small_plastic; force 27.760143478...,speed action: Reduce; leg spread action: Incre...,,,In response to the tactile sensor reading; I ...,,0.588896,0,0,0,1.274835


In [5]:
print(['gemmaspeed','llamasspeed','deepseekspeed','mistralspeed','gptspeed'])
print(np.average(df[['gemmaspeed','llamasspeed','deepseekspeed','mistralspeed','gptspeed']],axis=0))
print(np.std(df[['gemmaspeed','llamasspeed','deepseekspeed','mistralspeed','gptspeed']],axis=0))

['gemmaspeed', 'llamasspeed', 'deepseekspeed', 'mistralspeed', 'gptspeed']
[0.58889627 0.         0.         1.27483463 0.        ]
gemmaspeed       0.0
llamasspeed      0.0
deepseekspeed    0.0
mistralspeed     0.0
gptspeed         0.0
dtype: float64


## Functional

In [47]:
df_f=pd.read_csv("/its/home/drs25/Tactile_Language_Model/data/FunctionalModelOutputsold.csv")

new_df=pd.DataFrame({})
new_df["gemma"] = df_f["gemma"].str.findall(r'(\w+)\s*\(')
new_df["mistral"] = df_f["mistral"].str.findall(r'(\w+)\s*\(')
new_df["gpt"] = df_f["gpt"].str.findall(r'(\w+)\s*\(')


allowed_funcs = ["increaseSpeed", "slowSpeed","maintainSpeed","widenLegStride","maintainLegSTride","increaseLegStride","lowerBody","maintainBody","IncreaseBody"]

new_df["gemma"] = new_df["gemma"].apply(lambda funcs: [f for f in funcs if f in allowed_funcs])
new_df["mistral"] = new_df["mistral"].apply(lambda funcs: [f for f in funcs if f in allowed_funcs])
new_df["gpt"] = new_df["gpt"].apply(lambda funcs: [f for f in funcs if f in allowed_funcs])

for func in allowed_funcs:
    new_df[f"{func}"] = new_df["gemma"].apply(lambda funcs: funcs.count(func))
    new_df[f"{func}"] += new_df["mistral"].apply(lambda funcs: funcs.count(func))
    new_df[f"{func}"] += new_df["gpt"].apply(lambda funcs: funcs.count(func))

new_df.head()


AttributeError: Can only use .str accessor with string values!

In [46]:
#count percentages
speed_func=["increaseSpeed", "slowSpeed","maintainSpeed"]
strid_func=["widenLegStride","maintainLegSTride","increaseLegStride"]
body_func=["lowerBody","maintainBody","IncreaseBody"]

all_funcs = speed_func + strid_func + body_func
models = ["gemma", "mistral","gpt"]   # <- add your models here


for model in models:
    for func in all_funcs:
        new_df[f"{model}_{func}"] = new_df[model].apply(lambda funcs: funcs.count(func))

new_df["winning_category"] = new_df[["Speed_max","Stride_max","Body_max"]].idxmax(axis=1)

def model_in_winning_vote(row, model_prefix):
    cat = row["winning_category"]
    if cat == "Speed_max":
        return row[[f"{model_prefix}_{f}" for f in speed_func]].sum() > 0
    elif cat == "Stride_max":
        return row[[f"{model_prefix}_{f}" for f in strid_func]].sum() > 0
    elif cat == "Body_max":
        return row[[f"{model_prefix}_{f}" for f in body_func]].sum() > 0
    return False

for model in models:
    new_df[f"{model}_in_win"] = new_df.apply(model_in_winning_vote, axis=1, args=(model,))
for model in models:
    print(model, "win rate:", new_df[f"{model}_in_win"].mean())
new_df.head()

gemma win rate: 1.0
mistral win rate: 1.0


Unnamed: 0,gemma,mistral,increaseSpeed,slowSpeed,maintainSpeed,widenLegStride,maintainLegSTride,increaseLegStride,lowerBody,maintainBody,...,mistral_slowSpeed,mistral_maintainSpeed,mistral_widenLegStride,mistral_maintainLegSTride,mistral_increaseLegStride,mistral_lowerBody,mistral_maintainBody,mistral_IncreaseBody,gemma_in_win,mistral_in_win
0,"[maintainSpeed, maintainLegSTride, maintainBody]","[maintainSpeed, widenLegStride, maintainBody]",0,0,2,1,1,0,0,2,...,0,1,1,0,0,0,1,0,True,True
1,"[maintainSpeed, maintainLegSTride, maintainBody]","[maintainSpeed, lowerBody]",0,0,2,0,1,0,1,1,...,0,1,0,0,0,1,0,0,True,True
2,"[slowSpeed, maintainSpeed, increaseSpeed, wide...","[maintainSpeed, increaseLegStride]",1,1,2,1,1,2,1,1,...,0,1,0,0,1,0,0,0,True,True
3,"[slowSpeed, widenLegStride, maintainBody]","[maintainSpeed, widenLegStride, lowerBody]",0,1,1,2,0,0,1,1,...,0,1,1,0,0,1,0,0,True,True
4,"[maintainSpeed, maintainLegSTride, maintainBody]","[maintainSpeed, widenLegStride, lowerBody]",0,0,2,1,1,0,1,1,...,0,1,1,0,0,1,0,0,True,True


## Cosine analysis

In [15]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

In [19]:
cosine_frame=pd.DataFrame({
    "gemma-gpt":[0 for i in range(len(df['gemma']))],
    "gemma-deepseek":[0 for i in range(len(df['gemma']))],
    "gemma-llamas":[0 for i in range(len(df['gemma']))],
    "gemma-mistral":[0 for i in range(len(df['gemma']))],
    "deepseek-gpt":[0 for i in range(len(df['gemma']))],
    "deepseek-llamas":[0 for i in range(len(df['gemma']))],
    "deepseek-mistral":[0 for i in range(len(df['gemma']))],
    "gpt-llamas":[0 for i in range(len(df['gemma']))],
    "gpt-mistral":[0 for i in range(len(df['gemma']))],
})

keys=list(cosine_frame.keys())
for i in range(len(keys)):
    print(i/len(keys)*100,"%")
    key=keys[i]
    models=key.split("-")
    textA=df[models[0]]
    textB=df[models[1]]
    ar=[]
    for j in range(len(df[models[0]])):
        if not pd.isnull(textA[j]) and not pd.isnull(textB[j]):
            embedding1 = model.encode(textA[j], convert_to_tensor=True)
            embedding2 = model.encode(textB[j], convert_to_tensor=True)
            cosine_sim = util.pytorch_cos_sim(embedding1, embedding2)
            ar.append(cosine_sim.item())
        else:
            ar.append(0)
    cosine_frame[key]=ar
    cosine_frame.to_csv("/its/home/drs25/Tactile_Language_Model/data/Cosine_model_decision.csv")

cosine_frame

0.0 %
11.11111111111111 %
22.22222222222222 %
33.33333333333333 %
44.44444444444444 %
55.55555555555556 %
66.66666666666666 %
77.77777777777779 %
88.88888888888889 %
