# OASIS lookup helper
Provides `get_oasis_record(image_name, csv_path='OASIS.csv', exact=False)` to return a row as a dict from `OASIS.csv`.

In [2]:
import pandas as pd
from pathlib import Path
from typing import Optional, Dict, Any

# Utility to load the OASIS CSV once and reuse the DataFrame
def load_oasis(csv_path: str = 'OASIS.csv') -> pd.DataFrame:
    """Load `csv_path` into a pandas DataFrame (strings, no NA filtering)."""
    path = Path(csv_path)
    if not path.exists():
        raise FileNotFoundError(f"CSV file not found: {csv_path}")
    return pd.read_csv(path, )

oasis = load_oasis()


In [3]:
oasis.head()

Unnamed: 0.1,Unnamed: 0,Theme,Category,Source,Valence_mean,Valence_SD,Valence_N,Arousal_mean,Arousal_SD,Arousal_N
0,I1,Acorns 1,Object,Pixabay,4.686275,0.954203,102,2.346535,1.60272,101
1,I2,Acorns 2,Object,Pixabay,4.519608,0.84115,102,2.227723,1.399151,101
2,I3,Acorns 3,Object,Pixabay,4.754902,0.958921,102,2.306931,1.514877,101
3,I4,Alcohol 1,Object,Pixabay,4.685185,1.189111,108,2.865385,1.695555,104
4,I5,Alcohol 2,Object,Pixabay,4.25,1.136686,108,3.0,1.700942,104


In [4]:
oasis[["Valence_mean", "Arousal_mean"]].describe()

Unnamed: 0,Valence_mean,Arousal_mean
count,900.0,900.0
mean,4.331249,3.668098
std,1.227583,0.837789
min,1.107843,1.693069
25%,3.485149,3.182692
50%,4.519608,3.725464
75%,5.333333,4.27319
max,6.490196,5.722772


In [5]:
oasis[["Valence_mean", "Arousal_mean"]].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 900 entries, 0 to 899
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Valence_mean  900 non-null    float64
 1   Arousal_mean  900 non-null    float64
dtypes: float64(2)
memory usage: 14.2 KB


In [3]:
oasis.sort_values('Valence_mean', ascending=True).head()

Unnamed: 0.1,Unnamed: 0,Theme,Category,Source,Valence_mean,Valence_SD,Valence_N,Arousal_mean,Arousal_SD,Arousal_N
495,I496,Miserable pose 3,Person,Wikipedia,1.107843,0.419968,102,5.148515,2.342589,101
286,I287,Dummy 1,Object,Flickr/Christopher Michel,1.157407,0.549631,108,5.201923,2.378626,104
207,I208,Dead bodies 1,Person,Wikipedia,1.19802,0.735116,101,4.514563,2.484714,103
208,I209,Dead bodies 2,Person,Wikipedia,1.247525,0.698655,101,4.475728,2.371583,103
451,I452,KKK rally 2,Person,Wikipedia,1.254902,0.713189,102,4.980198,2.107037,101


In [4]:
oasis.sort_values('Arousal_mean', ascending=True).head()

Unnamed: 0.1,Unnamed: 0,Theme,Category,Source,Valence_mean,Valence_SD,Valence_N,Arousal_mean,Arousal_SD,Arousal_N
859,I860,Wall 2,Object,Pixabay,4.029412,0.49664,102,1.693069,1.238891,101
858,I859,Wall 1,Object,Pixabay,4.078431,0.591797,102,1.80198,1.233043,101
94,I95,Bricks 1,Object,Pixabay,4.156863,0.714006,102,1.811881,1.222398,101
181,I182,Cotton swabs 3,Object,Pixabay,4.127451,0.608058,102,1.811881,1.246699,101
860,I861,Wall 3,Object,Pixabay,4.058824,0.462955,102,1.811881,1.301636,101


In [5]:
def get_oasis_record(image_name: str, df: pd.DataFrame = oasis) -> Dict:
    """Return a dict of the first row that matches `image_name`."""
    # Ensure all values are strings for matching
    df_str = df.astype(str)

    mask = df_str.apply(lambda col: col == image_name).any(axis=1)

    matches = df[mask]
    if matches.shape[0] == 0:
        return None
    match = matches.iloc[0].to_dict()
    dict = {}
    dict["img"] = match["Theme"]
    dict["Valence_mean"] = match["Valence_mean"]
    dict["Arousal_mean"] = match["Arousal_mean"]
    return dict

df = get_oasis_record('Wall 3')
df

{'img': 'Wall 3',
 'Valence_mean': 4.05882352941176,
 'Arousal_mean': 1.81188118811881}

In [6]:
# Converted lists for categories
high_valence = ["Dog 6","Lake 9","Rainbow 2","Sunset 3",]
low_valence = ["Miserable pose 3","Tumor 1","Fire 9","Cockroach 1",]
high_arousal = ["Explosion 5","Parachuting 4","Snake 4","Lava 1",]
low_arousal = ["Wall 2","Cotton swabs 3","Office supplies 2","Socks 1",]

oasis_categories = {
    "high_valence": high_valence,
    "low_valence": low_valence,
    "high_arousal": high_arousal,
    "low_arousal": low_arousal,
}

# Display the grouped dict
oasis_categories

{'high_valence': ['Dog 6', 'Lake 9', 'Rainbow 2', 'Sunset 3'],
 'low_valence': ['Miserable pose 3', 'Tumor 1', 'Fire 9', 'Cockroach 1'],
 'high_arousal': ['Explosion 5', 'Parachuting 4', 'Snake 4', 'Lava 1'],
 'low_arousal': ['Wall 2', 'Cotton swabs 3', 'Office supplies 2', 'Socks 1']}