In [448]:
# Install missing plotting libraries
%pip install matplotlib seaborn

# Import relevant libaries
import pandas as pd
import os
from pathlib import Path
import re
import ast

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Users\Admin\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [449]:
# Load the participants_rows.csv file
data_dir = Path('google_exports')
df_surveys_final = pd.read_csv(data_dir / 'df_surveys_with_timing_filtered.csv')

df_surveys_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 55 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   participant_id              52 non-null     object 
 1   background_submitted_at     52 non-null     object 
 2   q2_gender                   52 non-null     object 
 3   q1_age_group                52 non-null     object 
 4   q3_education                52 non-null     object 
 5   q5_nationality              52 non-null     object 
 6   q7_ai_familiarity           52 non-null     int64  
 7   q8_attention_check          52 non-null     int64  
 8   q10_additional_info         52 non-null     object 
 9   q4_employment_status        52 non-null     object 
 10  q6_country_residence        52 non-null     object 
 11  q9_ai_usage_frequency       52 non-null     object 
 12  q11_response                52 non-null     object 
 13  instruction_submitted_at    52 non-nu

## Data Cleaning and Inconsistencies Handling for surveys datasets

In [450]:
def clean_nationality(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    # --- Manual fixes for common misspellings ---
    fixes = {
        "autrian": "austrian",
        "grrek": "greek",
        "kazakh ": "kazakh",
        "latvian": "latvian",
        "swiss": "switzerland",
        "de": "germany",
        "DE": "Germany",
        "usa": "usa",
        "us": "usa"
    }
    if text in fixes:
        text = fixes[text]

    # --- Mapping from nationality to country ---
    mapping = {
        "uk": "UK",
        "british": "UK",
        "united kingdom": "UK",
        "scottish": "UK",

        "german": "Germany",
        "deutschland": "Germany",
        "DE": "Germany",

        "switzerland": "Switzerland",

        "vietnamese": "Vietnam",
        "vietnam": "Vietnam",

        "vietnamese-german": "Germany",

        "kazakh": "Kazakhstan",
        "kazakhstan": "Kazakhstan",

        "latvian": "Latvia",
        "latvia": "Latvia",

        "italian": "Italy",
        "italy": "Italy",

        "czech": "Czech Republic",
        "czechia": "Czech Republic",

        "libyan": "Libya",
        "libya": "Libya",

        "austrian": "Austria",
        "austria": "Austria",

        "american": "USA",
        "usa": "USA",
        "us": "USA",
        "united states": "USA",
        "america": "USA",

        "china": "China",
        "chinese": "China",

        "french": "France",
        "france": "France",

        "sri lankan": "Sri Lanka",
        "sri lanka": "Sri Lanka",

        "ukrainian": "Ukraine",
        "ukraine": "Ukraine",

        "taiwan": "Taiwan",
        "taiwanese": "Taiwan",

        "dutch": "Netherlands",
        "netherlands": "Netherlands",

        "lithuanian": "Lithuania",
        "lithuania": "Lithuania",

        "hungarian": "Hungary",
        "hungary": "Hungary",

        "swedish": "Sweden",
        "sweden": "Sweden",

        "turkish": "Turkey",
        "turkey": "Turkey",

        "indian": "India",
        "india": "India",

        "filipino": "Philippines",
        "philippines": "Philippines",

        "brazilian": "Brazil",
        "brazil": "Brazil",

        "colombian": "Colombia",
        "colombia": "Colombia",

        "greek": "Greece",
        "greece": "Greece",
        
        "Russian": "Russia",
        
        "singaporean": "Singapore",
        "slovak": "Slovakia",
    }

    # Exact match
    if text in mapping:
        return mapping[text]

    # Handle composite nationalities → pick first recognized nationality
    parts = [p.strip() for p in text.replace("-", " ").split()]
    for p in parts:
        if p in mapping:
            return mapping[p]

    return None


# -----------------------------------------
# 1. Create cleaned column
# -----------------------------------------
cleaned_series = df_surveys_final["q5_nationality"].apply(clean_nationality)

# -----------------------------------------
# 2. Insert cleaned column directly after original
# -----------------------------------------
insert_pos = df_surveys_final.columns.get_loc("q5_nationality") + 1

df_surveys_final.insert(insert_pos, "q5_nationality_sd", cleaned_series)


In [451]:
df_surveys_final[["q5_nationality","q5_nationality_sd"]]

Unnamed: 0,q5_nationality,q5_nationality_sd
0,UK,UK
1,German,Germany
2,United States,USA
3,Russian,
4,Swiss,Switzerland
5,Vietnamese-german,Germany
6,British,UK
7,Kazakh,Kazakhstan
8,Vietnamese,Vietnam
9,Latvian,Latvia


In [452]:
mask_russian = df_surveys_final["q5_nationality"].astype(str).str.strip().str.lower().eq("russian")
df_surveys_final.loc[mask_russian, "q5_nationality_sd"] = "Russia"

mask_de = df_surveys_final["q5_nationality"].astype(str).str.strip().str.lower().eq("de")
df_surveys_final.loc[mask_de, "q5_nationality_sd"] = "Germany"

In [453]:
# -----------------------------------------
# 0. Rename original column
# -----------------------------------------
if "q6_country_residence" in df_surveys_final.columns:
    df_surveys_final.rename(
        columns={"q6_country_residence": "q6_residence"},
        inplace=True
    )

col = "q6_residence"   # new original column name


# -----------------------------------------
# 1. Country cleaning function
# -----------------------------------------

def clean_country(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    country_map = {
        # UK & variants
        "uk": "UK",
        "united kingdom": "UK",
        "england": "UK",
        "scotland": "UK",
        "britain": "UK",

        # USA
        "usa": "USA",
        "united states": "USA",
        "united states of america": "USA",
        "america": "USA",

        # Germany
        "germany": "Germany",
        "de": "Germany",
        "DE": "Germany",

        # Switzerland
        "switzerland": "Switzerland",

        # Czech Republic
        "czech republic": "Czech Republic",
        "czechia": "Czech Republic",

        # Netherlands
        "netherlands": "Netherlands",
        "the netherlands": "Netherlands",

        # Italy
        "italy": "Italy",

        # France
        "france": "France",

        # Russia
        "russia": "Russia",
        "Russia": "Russia",

        # China
        "china": "China",

        # Vietnam
        "vietnam": "Vietnam",
        "ho chi minh city": "Vietnam",

        # Taiwan
        "taiwan": "Taiwan",

        # Latvia
        "latvia": "Latvia",

        # Lithuania
        "lithuania": "Lithuania",

        # Singapore
        "singapore": "Singapore",

        # Slovakia
        "slovakia": "Slovakia",

        # Hungary
        "hungary": "Hungary",

        # Sweden
        "sweden": "Sweden",

        # Greece
        "greece": "Greece",

        # India
        "india": "India",

        # Philippines
        "philippines": "Philippines",

        # Canada
        "canada": "Canada",
    }

    if text in country_map:
        return country_map[text]

    return text.title()


# -----------------------------------------
# 2. Create cleaned column
# -----------------------------------------
cleaned_series = df_surveys_final[col].apply(clean_country)


# -----------------------------------------
# 3. Insert cleaned column directly after original
# -----------------------------------------
insert_pos = df_surveys_final.columns.get_loc(col) + 1

df_surveys_final.insert(insert_pos, "q6_residence_sd", cleaned_series)


In [454]:
df_surveys_final[["q6_residence","q6_residence_sd"]]

Unnamed: 0,q6_residence,q6_residence_sd
0,UK,UK
1,Germany,Germany
2,United States,USA
3,Russia,Russia
4,Switzerland,Switzerland
5,Germany,Germany
6,Uk,UK
7,Germany,Germany
8,Vietnam,Vietnam
9,Latvia,Latvia


### q10_search_tools

In [455]:
# -----------------------------------------
# RENAME ORIGINAL COLUMN
# -----------------------------------------
df_surveys_final.rename(columns={"q10_additional_info": "q10_search_tools"}, inplace=True)

# Use the new column name from now on
col = "q10_search_tools"

# -----------------------------------------
# Helper functions (unchanged)
# -----------------------------------------

def clean_item(item):
    if not item or pd.isna(item):
        return None

    item = item.strip()

    corrections = {
        r"\bgoogle\b": "Google",
        r"\bamazon\b": "Amazon",
        r"\bshoppe\b": "Shopee",
        r"\bshopee\b": "Shopee",
        r"\bwalmart\b": "Walmart",
        r"\bshein\b": "Shein",
        r"\btiktok\b": "TikTok",
        r"chat ?gpt": "ChatGPT",
    }
    for pattern, replacement in corrections.items():
        item = re.sub(pattern, replacement, item, flags=re.IGNORECASE)

    if re.search(r"price\s*compar", item, flags=re.IGNORECASE):
        return "Price comparison sites"

    if item.isalpha() and item.lower() not in [
        "google", "amazon", "shopee", "etsy", "ebay", "vinted",
        "facebook", "tiktok", "walmart", "shein", "taobao"
    ]:
        item = item.capitalize()

    return item


def split_items(text):
    if pd.isna(text):
        return []

    parts = re.split(r"[,/]| and | or ", str(text), flags=re.IGNORECASE)

    cleaned = []
    for p in parts:
        p = clean_item(p)
        if p:
            cleaned.append(p)

    unique = []
    for x in cleaned:
        if x not in unique:
            unique.append(x)

    return unique

# -----------------------------------------
# PROCESS COLUMN (new columns follow original)
# -----------------------------------------

df_surveys_final["splitted"] = df_surveys_final[col].apply(split_items)
max_len = df_surveys_final["splitted"].apply(len).max()

# Insert new columns immediately after renamed column
insert_pos = df_surveys_final.columns.get_loc(col) + 1

new_cols = {}
for i in range(1, max_len + 1):
    cname = f"q10_tool_{i}"
    new_cols[cname] = df_surveys_final["splitted"].apply(
        lambda x: x[i-1] if len(x) >= i else None
    )

for idx, (cname, series) in enumerate(new_cols.items()):
    df_surveys_final.insert(insert_pos + idx, cname, series)

df_surveys_final.drop(columns=["splitted"], inplace=True)

# -----------------------------------------
# MANUAL FIXES
# -----------------------------------------

updates = {
    16: ("RedNote", "Taobao"),
    29: ("Amazon", "Shein"),
}

for idx, (t1, t2) in updates.items():
    if idx in df_surveys_final.index:
        df_surveys_final.at[idx, "q10_tool_1"] = t1
        df_surveys_final.at[idx, "q10_tool_2"] = t2

if "expanded_df" in globals() and expanded_df is not None:
    for idx, (t1, t2) in updates.items():
        if idx in expanded_df.index:
            expanded_df.at[idx, "q10_tool_1"] = t1
            expanded_df.at[idx, "q10_tool_2"] = t2


### Column 14: q12_smartphone_model

In [456]:
# -----------------------------------------
# 0. Rename original column
# -----------------------------------------

if "q12_smartphone_model" in df_surveys_final.columns:
    df_surveys_final.rename(columns={"q12_smartphone_model": "q12_brand_model"}, inplace=True)

col = "q12_brand_model"   # renamed original column


# -----------------------------------------
# 1. Normalization helpers (for brand extraction)
# -----------------------------------------

def clean_text(text):
    if pd.isna(text):
        return None
    text = text.strip()
    text = re.sub(r",\s*\(", " (", text)
    text = re.sub(r"\s+", " ", text)
    return text

def normalize_iphone(model):
    model = re.sub(r"iphone", "Apple iPhone", model, flags=re.IGNORECASE)
    return re.sub(
        r"apple iphone\s*(\d+)\s*(pro max|pro|plus|max|air|e)?",
        lambda m: "Apple iPhone " + m.group(1) + (" " + m.group(2).title() if m.group(2) else ""),
        model,
        flags=re.IGNORECASE
    ).strip()

def normalize_samsung(model):
    model = re.sub(r"(samsung\s*galaxy|galaxy|samsung)", "Samsung Galaxy", model, flags=re.IGNORECASE)
    return re.sub(
        r"samsung galaxy\s*(s?\d+[a-z]*\s*(ultra|fe|max)?)",
        lambda m: "Samsung Galaxy " + m.group(1).upper().replace("ULTRA", "Ultra").replace("FE", "FE"),
        model,
        flags=re.IGNORECASE
    ).strip()

def normalize_pixel(model):
    return re.sub(r"(google pixel|pixel)", "Google Pixel", model, flags=re.IGNORECASE).strip()

def normalize_xiaomi(model):
    model = re.sub(r"\bxiaomi\b", "Xiaomi", model, flags=re.IGNORECASE)
    model = re.sub(r"\bredmi\b", "Redmi", model, flags=re.IGNORECASE)
    return model.strip()

def normalize_for_brand(text):
    """Normalize only enough to help brand extraction—not full cleaning."""
    if pd.isna(text):
        return None

    text = clean_text(text)
    temp = text.lower()

    if "iphone" in temp or temp.startswith("apple"):
        return normalize_iphone(text)
    if "samsung" in temp or "galaxy" in temp:
        return normalize_samsung(text)
    if "pixel" in temp:
        return normalize_pixel(text)
    if "xiaomi" in temp or "redmi" in temp:
        return normalize_xiaomi(text)

    return text.strip()


# -----------------------------------------
# 2. Brand extraction
# -----------------------------------------

def extract_brand(text):
    if pd.isna(text):
        return None

    t = text.lower().strip()

    brand_map = {
        r"\bapple\b": "Apple",
        r"\biphone\b": "Apple",
        r"\bsamsung\b": "Samsung",
        r"\bgalaxy\b": "Samsung",
        r"\bgoogle pixel\b": "Google",
        r"\bpixel\b": "Google",
        r"\bxiaomi\b": "Xiaomi",
        r"\bredmi\b": "Redmi",
        r"\brealme\b": "Realme",
        r"\bnuu\b": "NUU",
        r"\bsony\b": "Sony",
        r"\bmotorola\b": "Motorola",
        r"\boppo\b": "Oppo",
        r"\bvivo\b": "Vivo",
        r"\bhuawei\b": "Huawei",
        r"\boneplus\b": "OnePlus",
    }

    for pattern, brand in brand_map.items():
        if re.search(pattern, t, flags=re.IGNORECASE):
            return brand

    return text.split()[0].capitalize()


# -----------------------------------------
# 3. Create ONLY q12_brand_only column
# -----------------------------------------

df_surveys_final["q12_brand_only"] = df_surveys_final[col].apply(
    lambda x: extract_brand(normalize_for_brand(x))
)


# -----------------------------------------
# 4. Insert q12_brand_only DIRECTLY after q12_brand_model
# -----------------------------------------

insert_pos = df_surveys_final.columns.get_loc("q12_brand_model") + 1

# Pop + insert ensures correct placement
series = df_surveys_final.pop("q12_brand_only")
df_surveys_final.insert(insert_pos, "q12_brand_only", series)


### Column 15: q13_storage_capacity

In [457]:
col = "q13_storage_capacity"   # original column name

# -----------------------------------------
# Standardization Function (unchanged)
# -----------------------------------------

def standardize_storage(value):
    if pd.isna(value):
        return None

    text = str(value).lower()

    # Remove irrelevant characters
    text = text.replace(",", " ")

    # Find patterns like 64, 128gb, 256 gb, 512-gb, 1tb etc.
    match = re.search(r"(\d+)\s*(gb|tb)?", text, flags=re.IGNORECASE)

    if not match:
        return None

    num = match.group(1)
    unit = match.group(2)

    # Default unit
    if unit is None:
        unit = "GB"

    unit = unit.upper()

    return f"{num} {unit}"


# -----------------------------------------
# Split multi-storage responses
# -----------------------------------------

def split_storage(text):
    """Split responses into separate standardized values."""
    if pd.isna(text):
        return []

    parts = re.split(r"[,/]| and | or |;", str(text), flags=re.IGNORECASE)

    cleaned = []
    for p in parts:
        s = standardize_storage(p)
        if s:
            cleaned.append(s)

    # Remove duplicates, keep order
    seen = set()
    final = []
    for x in cleaned:
        if x not in seen:
            seen.add(x)
            final.append(x)

    return final


# -----------------------------------------
# Apply and INSERT new columns appropriately
# -----------------------------------------

# Temporary split list
df_surveys_final["q13_split"] = df_surveys_final[col].apply(split_storage)

# Determine maximum number of storage entries
max_len = df_surveys_final["q13_split"].apply(len).max()

# Insert new columns AFTER the original column
insert_pos = df_surveys_final.columns.get_loc(col) + 1

# Build columns first
new_cols = {}

for i in range(1, max_len + 1):
    colname = f"q13_storage_{i}_sd"
    new_cols[colname] = df_surveys_final["q13_split"].apply(
        lambda x: x[i-1] if len(x) >= i else None
    )

# Insert each new column in correct order
for idx, (name, series) in enumerate(new_cols.items()):
    df_surveys_final.insert(insert_pos + idx, name, series)

# Remove helper column
df_surveys_final.drop(columns=["q13_split"], inplace=True)


In [458]:
df_surveys_final


Unnamed: 0,participant_id,background_submitted_at,q2_gender,q1_age_group,q3_education,q5_nationality,q5_nationality_sd,q7_ai_familiarity,q8_attention_check,q10_search_tools,...,q37_results_useful,q38_attention_check,q39_contradictory_handling,q40_purchase_likelihood,q41_time_spent,q42_future_usage_feedback,session_start_time,session_end_time,session_duration_ms,record_created_at
0,09e6255b-ed66-4eea-a314-104a08130ac0,2025-10-31 22:11:56.864775+00:00,female,35-44,doctorate,UK,UK,7,1,"Google, Ebay, Etsy, ViaLibri",...,5.0,3,"[""additional_sources"",""own_judgment""]",4.0,6-10,"I liked the more minimal interface, and I like...",2025-10-31 22:10:58.834+00,2025-10-31 22:18:19.43+00,440596.0,2025-10-31 22:18:19.588138+00
1,15ef74b6-a61a-474c-b855-696b20ce58fb,2025-10-24 14:12:35.527412+00:00,male,55-and-above,master,German,Germany,1,1,Google,...,5.0,3,"[""no_contradictions""]",5.0,3-5,Maybe,2025-10-24 14:11:51.631+00,2025-10-24 14:24:48.07+00,776439.0,2025-10-24 14:24:48.231503+00
2,1f0df1be-a1ea-4080-90c3-230fe9e35174,2025-11-10 03:25:51.855943+00:00,female,18-24,bachelor,United States,USA,6,1,Google,...,6.0,3,"[""additional_sources""]",3.0,3-5,"Yes, it showed me relevant results for what I ...",2025-11-10 03:25:14.283+00,2025-11-10 03:26:06.698+00,52415.0,2025-11-10 03:26:07.101629+00
3,2afa9961-1844-49e8-80fc-444466532f46,2025-10-24 17:08:04.105300+00:00,female,25-34,master,Russian,Russia,6,6,Google,...,7.0,3,"[""most_detailed"",""own_judgment""]",7.0,less-than-2,"Yes, the tool was transparent and easy to use,...",2025-10-24 17:06:39.799+00,2025-10-24 17:10:49.029+00,249230.0,2025-10-24 17:10:49.328167+00
4,2d8dd1db-9d38-49e0-bf03-5b4735523d27,2025-10-29 09:54:21.050268+00:00,female,18-24,high-school,Swiss,Switzerland,7,4,"Google, tiktok, official website",...,5.0,3,"[""additional_sources"",""own_judgment""]",4.0,more-than-10,"No, AI didn’t show the classical bar abovd",2025-10-29 09:52:50.793+00,2025-10-29 09:54:46.465+00,115672.0,2025-10-29 09:54:46.569185+00
5,2e912156-c7b7-4268-8420-128a859c4876,2025-10-26 15:45:12.277452+00:00,female,25-34,master,Vietnamese-german,Germany,3,3,Google,...,2.0,3,"[""additional_sources"",""own_judgment""]",3.0,more-than-10,"Yes, easy to use",2025-10-26 15:44:29.794+00,2025-10-26 15:45:39.02+00,69226.0,2025-10-26 15:45:39.157648+00
6,3409f5be-93f2-44f4-8edb-910e95126257,2025-11-10 07:23:32.649361+00:00,female,25-34,bachelor,British,UK,5,1,Google,...,5.0,3,"[""first_result""]",5.0,less-than-2,Its easy to use,2025-11-10 07:22:57.74+00,2025-11-10 07:23:56.719+00,58979.0,2025-11-10 07:23:56.945929+00
7,35f58cd5-b9a5-4132-a94d-8fcad2800a59,2025-10-24 19:13:03.589983+00:00,female,35-44,bachelor,Kazakh,Kazakhstan,6,1,Google,...,4.0,3,"[""no_contradictions""]",3.0,3-5,I don’t think so. It is not visually appealing,2025-10-24 19:11:36.971+00,2025-10-24 19:14:27.424+00,170453.0,2025-10-24 19:14:27.541556+00
8,3beee5db-499b-4741-b3ea-72c6f17ffb86,2025-10-24 08:55:18.954744+00:00,female,25-34,bachelor,Vietnamese,Vietnam,6,1,Shoppe,...,5.0,3,"[""additional_sources"",""own_judgment""]",3.0,6-10,My experience with this version of Google Sear...,2025-10-24 08:47:58.199+00,2025-10-24 09:02:52.01+00,893811.0,2025-10-24 09:02:52.809048+00
9,403feef5-647d-4a34-a15a-c4bdc29fb2c2,2025-11-09 14:56:32.243330+00:00,female,18-24,high-school,Latvian,Latvia,5,1,Google,...,6.0,3,"[""additional_sources"",""most_detailed""]",6.0,3-5,"Yes, it works well for me",2025-11-09 14:41:44.625+00,2025-11-09 14:58:58.615+00,1033990.0,2025-11-09 14:58:58.735264+00


### Column 16: q14_color

In [459]:
col = "q14_color"   # original column

# -----------------------------------------
# 1. Cleaning + splitting function
# -----------------------------------------

def clean_and_split_colors(text):
    if pd.isna(text):
        return []

    text = str(text).strip()

    # Remove prefixes like "Color - "
    text = re.sub(r"color[\s:-]*", "", text, flags=re.IGNORECASE)

    # Handle "I don't know"
    if re.search(r"i\s*don'?t\s*know", text, flags=re.IGNORECASE):
        return []

    # Normalize separators
    text = re.sub(r"[/&|-]+", ",", text)
    text = re.sub(r"\band\b", ",", text, flags=re.IGNORECASE)

    parts = [p.strip() for p in text.split(",") if p.strip()]

    cleaned = []
    for p in parts:
        words = p.split()
        cleaned_words = [w.capitalize() for w in words]
        cleaned.append(" ".join(cleaned_words))

    return cleaned


# -----------------------------------------
# 2. Create split list column
# -----------------------------------------

df_surveys_final["q14_split"] = df_surveys_final[col].apply(clean_and_split_colors)


# -----------------------------------------
# 3. Determine max number of colors
# -----------------------------------------

max_len = df_surveys_final["q14_split"].apply(len).max()


# -----------------------------------------
# 4. Insert new columns *immediately after original column*
# -----------------------------------------

insert_pos = df_surveys_final.columns.get_loc(col) + 1

for i in range(1, max_len + 1):
    new_col = f"q14_color_{i}_sd"
    df_surveys_final.insert(
        insert_pos + (i - 1),
        new_col,
        df_surveys_final["q14_split"].apply(lambda lst: lst[i-1] if len(lst) >= i else None)
    )


# -----------------------------------------
# 5. Remove helper column (keep original)
# -----------------------------------------

df_surveys_final.drop(columns=["q14_split"], inplace=True)


### Column 17: q15_lowest_price

In [460]:
df = df_surveys_final
old_col = "q15_lowest_price"

# -----------------------------------------
# 0. Rename original column (KEEP original)
# -----------------------------------------

df.rename(columns={old_col: "q15_price"}, inplace=True)
col = "q15_price"   # use new name going forward


# -----------------------------------------
# 1. Conversion constants
# -----------------------------------------

USD_TO_EUR = 0.866
GBP_TO_EUR = 1.17


# -----------------------------------------
# 2. Parsing function
# -----------------------------------------

def parse_price_to_eur(val):
    if pd.isna(val):
        return None

    text = str(val).strip()

    if re.search(r"did not find a price", text, flags=re.IGNORECASE):
        return None

    clean = (
        text.replace("€", "")
            .replace("eur", "")
            .replace("euros", "")
            .replace("euro", "")
            .replace(",", ".")
            .strip()
    )

    # GBP
    if text.startswith("£"):
        try:
            num = float(clean)
            return round(num * GBP_TO_EUR, 2)
        except:
            return None

    # USD
    if text.startswith("$") or "usd" in text.lower():
        try:
            num = float(re.sub(r"[^\d.]", "", clean))
            return round(num * USD_TO_EUR, 2)
        except:
            return None

    # EUR or plain numbers
    number_only = re.sub(r"[^\d.]", "", clean)
    try:
        return round(float(number_only), 2)
    except:
        return None


# -----------------------------------------
# 3. Assign price range
# -----------------------------------------

def assign_price_range(eur_val):
    if eur_val is None:
        return None
    if eur_val < 150:
        return "Under €150"
    elif eur_val < 300:
        return "€150-299"
    elif eur_val < 450:
        return "€300-449"
    elif eur_val < 600:
        return "€450-599"
    elif eur_val < 800:
        return "€600-799"
    else:
        return "Over €800"


# -----------------------------------------
# 4. Create new standardized columns
# -----------------------------------------

df["q15_price_euro"] = df[col].apply(parse_price_to_eur)

# Format as decimal-comma style
df["q15_price_euro"] = df["q15_price_euro"].apply(
    lambda x: f"{x:.2f}".replace(".", ",") if pd.notna(x) else None
)

df["q15_price_range"] = df["q15_price_euro"].apply(
    lambda v: assign_price_range(float(v.replace(",", "."))) if v not in (None, "") else None
)


# -----------------------------------------
# 5. Insert new columns right after ORIGINAL column (q15_price)
# -----------------------------------------

insert_pos = df.columns.get_loc("q15_price") + 1

for new_col in ["q15_price_euro", "q15_price_range"]:
    df.insert(insert_pos, new_col, df.pop(new_col))
    insert_pos += 1  # ensure order stays correct


# -----------------------------------------
# 6. DO NOT drop original column
# (requirement: KEEP q15_price)
# -----------------------------------------


### Column 20: q18_smartphone_features

In [461]:
# -----------------------------------------
# 0. Rename original column (KEEP it)
# -----------------------------------------
if "q18_smartphone_features" in df_surveys_final.columns:
    df_surveys_final.rename(
        columns={"q18_smartphone_features": "q18_important_features"},
        inplace=True
    )

col = "q18_important_features"


# -----------------------------------------
# 1. Helper: Parse list-like strings
# -----------------------------------------
def parse_feature_list(x):
    if pd.isna(x):
        return []
    try:
        lst = ast.literal_eval(x)
    except:
        x = x.strip().lstrip("[").rstrip("]")
        lst = [i.strip().strip('"').strip("'") for i in x.split(",") if i.strip()]
    return [item.capitalize() for item in lst]


# -----------------------------------------
# 2. Convert rows to parsed lists
# -----------------------------------------
df_surveys_final["parsed_features"] = df_surveys_final[col].apply(parse_feature_list)


# -----------------------------------------
# 3. Insert new q18_feature_* columns AFTER original column
# -----------------------------------------
insert_pos = df_surveys_final.columns.get_loc(col) + 1

for i in range(1, 4):  # maximum 3 features
    df_surveys_final.insert(
        insert_pos + (i - 1),
        f"q18_feature_{i}",
        df_surveys_final["parsed_features"].apply(lambda lst: lst[i-1] if len(lst) >= i else None)
    )


# -----------------------------------------
# 4. Remove helper column (keep original)
# -----------------------------------------
df_surveys_final.drop(columns=["parsed_features"], inplace=True)


### Column 46: q39_contradictory_handling

In [462]:
df = df_surveys_final
old_col = "q39_contradictory_handling"      # your existing column
new_col = "q39_contradictory_info"          # renamed column

# ------------------------------------------------
# 1. Parse list-like strings into actual lists
# ------------------------------------------------
def parse_list(x):
    """Parse list-like strings; capitalize items."""
    if pd.isna(x):
        return []
    try:
        lst = ast.literal_eval(x)
    except:
        # Fallback parser for non-standard formats
        x = x.strip().lstrip("[").rstrip("]")
        lst = [i.strip().strip('"').strip("'") for i in x.split(",") if i.strip()]
    return [item.capitalize() for item in lst]


# ------------------------------------------------
# 2. Parse into lists
# ------------------------------------------------
df["parsed_temp"] = df[old_col].apply(parse_list)

# ------------------------------------------------
# 3. Determine how many output columns to create
# ------------------------------------------------
max_len = df["parsed_temp"].apply(len).max()

# ------------------------------------------------
# 4. Insert q39_info_1, q39_info_2, ...
# ------------------------------------------------
insert_pos = df.columns.get_loc(old_col)

for i in range(1, max_len + 1):
    df.insert(
        insert_pos + (i - 0),   # keep the original column in place
        f"q39_info_{i}",
        df["parsed_temp"].apply(lambda lst: lst[i-1] if len(lst) >= i else None)
    )

# ------------------------------------------------
# 5. Rename original column (do NOT delete it)
# ------------------------------------------------
df.rename(columns={old_col: new_col}, inplace=True)

# Remove temp helper
df.drop(columns=["parsed_temp"], inplace=True)

# ------------------------------------------------
# 6. Map short codes → full descriptive text
# ------------------------------------------------
contradiction_map = {
    "First_result": "I trusted the first result I clicked on",
    "Additional_sources": "I searched for and compared additional sources",
    "Most_detailed": "I chose the result that seemed most detailed or complete",
    "Own_judgment": "I relied on my own knowledge or judgment",
    "No_contradictions": "I did not find any contradictions",
    "Other": "Other",
    None: None
}

# Apply mapping
for col in [f"q39_info_{i}" for i in range(1, max_len + 1)]:
    df[col] = df[col].map(contradiction_map)

In [463]:
df_surveys_final["q1_age_group"]

0            35-44
1     55-and-above
2            18-24
3            25-34
4            18-24
5            25-34
6            25-34
7            35-44
8            25-34
9            18-24
10           18-24
11           25-34
12           18-24
13        under-18
14           18-24
15        under-18
16           18-24
17           35-44
18           25-34
19           18-24
20           25-34
21           25-34
22           25-34
23        under-18
24        under-18
25    55-and-above
26           25-34
27           18-24
28           25-34
29           18-24
30           18-24
31           18-24
32           18-24
33           35-44
34           25-34
35           18-24
36           35-44
37           18-24
38           25-34
39           25-34
40           25-34
41           25-34
42           25-34
43           18-24
44           18-24
45           35-44
46           35-44
47           25-34
48    55-and-above
49           25-34
50           18-24
51           18-24
Name: q1_age

In [464]:
# --- Clean Age Group (only map values that appear in the dataset) ---
def clean_age_group(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    mapping = {
        "under-18": "Under 18",
        "18-24": "18-24",
        "25-34": "25-34",
        "35-44": "35-44",
        "55-and-above": "55 and above",
    }

    return mapping.get(text, None)


# --- Apply to dataframe ---
df_surveys_final["q1_age_group"] = df_surveys_final["q1_age_group"].apply(clean_age_group)

# Optional: check unique values
print(df_surveys_final["q1_age_group"].unique())


['35-44' '55 and above' '18-24' '25-34' 'Under 18']


In [465]:
df_surveys_final["q2_gender"]

0                female
1                  male
2                female
3                female
4                female
5                female
6                female
7                female
8                female
9                female
10               female
11               female
12               female
13               female
14               female
15               female
16               female
17               female
18                 male
19               female
20                 male
21               female
22                 male
23                 male
24               female
25               female
26               female
27               female
28                 male
29               female
30                 male
31               female
32                 male
33               female
34               female
35                 male
36               female
37               female
38    prefer-not-to-say
39                 male
40               female
41              

In [466]:
# --- Clean Gender (only map values that appear in dataset) ---
def clean_gender(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    mapping = {
        "female": "Female",
        "male": "Male",
        "prefer-not-to-say": "Prefer not to say",
    }

    return mapping.get(text, None)


# --- Apply to dataframe ---
df_surveys_final["q2_gender"] = df_surveys_final["q2_gender"].apply(clean_gender)

In [467]:
# --- Clean mapping function (only maps values in dataset) ---
def clean_education(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    # Only mapping based on values that appear in your dataset
    mapping = {
        "high-school": "High school or below",
        "bachelor": "Bachelor's degree",
        "master": "Master's degree",
        "doctorate": "Doctorate / PhD",
    }

    return mapping.get(text, None)


# --- Apply to dataframe ---
df_surveys_final["q3_education"] = (
    df_surveys_final["q3_education"].apply(clean_education)
)

# Optional: check unique values after cleaning
print(df_surveys_final["q3_education"].unique())


['Doctorate / PhD' "Master's degree" "Bachelor's degree"
 'High school or below']


In [468]:
def clean_employment_status(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    # Student
    if any(x in text for x in ["student", "studying", "pupil"]):
        return "Student"

    # Employed (full-time or part-time)
    if any(x in text for x in ["employed", "working", "full-time", "part-time", "employee", "work"]):
        return "Employed"

    # Self-employed
    if any(x in text for x in ["self-employed", "self employed", "entrepreneur", "freelance"]):
        return "Self-employed"

    # Unemployed
    if any(x in text for x in ["unemployed", "jobless", "not working"]):
        return "Unemployed"

    # Everything else → Other
    return "Other"


# ✔ Apply directly to df_surveys_final
df_surveys_final["q4_employment_status"] = df_surveys_final["q4_employment_status"].apply(clean_employment_status)


In [469]:
# --- Clean mapping function ---
def clean_ai_usage(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    mapping = {
        "0-times": "0 times",
        "1-2-times": "1-2 times",
        "3-5-times": "3-5 times",
        "6-10-times": "6-10 times",
        "more-than-10": "More than 10 times"
    }

    return mapping.get(text, None)


# --- Apply to dataframe ---
df_surveys_final["q9_ai_usage_frequency"] = (
    df_surveys_final["q9_ai_usage_frequency"].apply(clean_ai_usage)
)

In [470]:
def clean_q11(val):
    if pd.isna(val):
        return None

    text = str(val).strip().lower()

    # Normalization
    text = text.replace("–", "-").replace("—", "-").replace("€", "")

    # ----- Under €150 -----
    if "under" in text or "<150" in text or "below" in text or "under 150" in text:
        return "Under €150"

    # ----- €150–299 -----
    if "150-299" in text or ("150" in text and "299" in text):
        return "€150-299"

    # ----- €300–449 -----
    if "300-449" in text or ("300" in text and "449" in text):
        return "€300-449"

    # ----- €450–599 -----
    if "450-599" in text or ("450" in text and "599" in text):
        return "€450-599"

    # ----- €600–799 -----
    if "600-799" in text or ("600" in text and "799" in text):
        return "€600-799"

    # ----- Over €800 -----
    if "over" in text or "above" in text or "800" in text or ">800" in text:
        return "Over €800"

    # ----- Not sure -----
    if "not sure" in text or "unsure" in text or "dont know" in text:
        return "Not Sure"

    return None


# ✔ Apply directly to df_surveys_final
df_surveys_final["q11_response"] = df_surveys_final["q11_response"].apply(clean_q11)


In [471]:
# Columns to remove
cols_to_drop = [
    "created_at",
    "session_id_bg_inst_search",
    "session_id_postsurvey",
    "record_created_at"
]

df_surveys_final = df_surveys_final.drop(
    columns=[c for c in cols_to_drop if c in df_surveys_final.columns],
    errors="ignore"
)

# Extract all q-columns
q_cols = [col for col in df_surveys_final.columns if col.startswith("q")]

# Sort q-columns by their question number (q1 → q42)
q_cols_sorted = sorted(q_cols, key=lambda x: int(x.split("_")[0][1:]))

# Move q1 to immediately after participant_id
if "q1_age_group" in q_cols_sorted:
    q_cols_sorted.remove("q1_age_group")
    q_cols_sorted.insert(0, "q1_age_group")

# Final column order
ordered_cols = (
    ["participant_id"] +
    q_cols_sorted +
    [
        "background_submitted_at",
        "instruction_submitted_at",
        "postsurvey_submitted_at",
        "ip_address",
        "device_type",
        "results_submitted_at",
        "session_start_time",
        "session_end_time",
        "session_duration_ms"
    ]
)

# Keep only existing columns
ordered_cols = [c for c in ordered_cols if c in df_surveys_final.columns]

# Apply final ordering
df_surveys_final = df_surveys_final[ordered_cols]


In [473]:
# Batch rename columns in dataframe
renames = {
    "q26_tool_clear_interaction": "q26_tool_clear",
    "q28_tool_provides_info": "q28_tool_info_provided",
    "q29_tool_helps_complete": "q29_tool_easier_task",
    "q32_tool_hard_focus": "q32_tool_difficult_focus",
    "q34_results_trustworthy": "q34_results_trust",
    "q41_time_spent": "q41_duration", 
    "q42_future_usage_feedback": "q42_comments", 
    "q11_response": "q11_budget", 
    "q7_ai_familiarity": "q7_chatbot_familiarity",
    "q8_attention_check": "q8_data_quality",
    "q9_ai_usage_frequency": "q9_chatbot_usage",
    'q16_website_link': 'q16_website',
    "q13_storage_capacity": "q13_storage",
    "q4_employment_status": "q4_employment",
    "q38_attention_check": "q38_attention",
}

for old, new in renames.items():
    if old in df_surveys_final.columns:
        df_surveys_final.rename(columns={old: new}, inplace=True)

    # Update any in-memory lists/dicts that reference the old name
    if "ordered_cols" in globals():
        ordered_cols = [new if c == old else c for c in ordered_cols]
    if "q_cols" in globals():
        q_cols = [new if c == old else c for c in q_cols]
    if "q_cols_sorted" in globals():
        q_cols_sorted = [new if c == old else c for c in q_cols_sorted]

    if "rename_dict" in globals() and old in rename_dict:
        rename_dict[new] = rename_dict.pop(old)

In [474]:
df_surveys_final.columns

Index(['participant_id', 'q1_age_group', 'q2_gender', 'q3_education',
       'q4_employment', 'q5_nationality', 'q5_nationality_sd', 'q6_residence',
       'q6_residence_sd', 'q7_chatbot_familiarity', 'q8_data_quality',
       'q9_chatbot_usage', 'q10_search_tools', 'q10_tool_1', 'q10_tool_2',
       'q10_tool_3', 'q10_tool_4', 'q11_budget', 'q12_brand_model',
       'q12_brand_only', 'q13_storage', 'q13_storage_1_sd', 'q13_storage_2_sd',
       'q14_color', 'q14_color_1_sd', 'q15_price', 'q15_price_euro',
       'q15_price_range', 'q16_website', 'q17_price_importance',
       'q18_important_features', 'q18_feature_1', 'q18_feature_2',
       'q18_feature_3', 'q19_task_easy', 'q20_task_quick', 'q21_task_familiar',
       'q22_tool_reliable', 'q23_tool_practical', 'q24_tool_like',
       'q25_tool_easy_use', 'q26_tool_clear', 'q27_tool_control',
       'q28_tool_info_provided', 'q29_tool_easier_task', 'q30_tool_useful',
       'q31_tool_too_much_info', 'q32_tool_difficult_focus',
      

In [475]:
df_surveys_final

Unnamed: 0,participant_id,q1_age_group,q2_gender,q3_education,q4_employment,q5_nationality,q5_nationality_sd,q6_residence,q6_residence_sd,q7_chatbot_familiarity,...,q42_comments,background_submitted_at,instruction_submitted_at,postsurvey_submitted_at,ip_address,device_type,results_submitted_at,session_start_time,session_end_time,session_duration_ms
0,09e6255b-ed66-4eea-a314-104a08130ac0,35-44,Female,Doctorate / PhD,Employed,UK,UK,UK,UK,7,...,"I liked the more minimal interface, and I like...",2025-10-31 22:11:56.864775+00:00,2025-10-31 22:12:41.977243+00:00,2025-10-31 22:22:26.969691+00:00,129.67.117.187,desktop,2025-10-31 22:19:44.042285+00:00,2025-10-31 22:10:58.834+00,2025-10-31 22:18:19.43+00,440596.0
1,15ef74b6-a61a-474c-b855-696b20ce58fb,55 and above,Male,Master's degree,Employed,German,Germany,Germany,Germany,1,...,Maybe,2025-10-24 14:12:35.527412+00:00,2025-10-24 14:21:30.862457+00:00,2025-10-24 14:28:43.167002+00:00,213.146.69.174,desktop,2025-10-24 14:27:11.483904+00:00,2025-10-24 14:11:51.631+00,2025-10-24 14:24:48.07+00,776439.0
2,1f0df1be-a1ea-4080-90c3-230fe9e35174,18-24,Female,Bachelor's degree,Student,United States,USA,United States,USA,6,...,"Yes, it showed me relevant results for what I ...",2025-11-10 03:25:51.855943+00:00,2025-11-10 03:26:25.769685+00:00,2025-11-10 03:29:58.830293+00:00,108.2.105.241,desktop,2025-11-10 03:28:20.935272+00:00,2025-11-10 03:25:14.283+00,2025-11-10 03:26:06.698+00,52415.0
3,2afa9961-1844-49e8-80fc-444466532f46,25-34,Female,Master's degree,Employed,Russian,Russia,Russia,Russia,6,...,"Yes, the tool was transparent and easy to use,...",2025-10-24 17:08:04.105300+00:00,2025-10-24 17:10:42.739044+00:00,2025-10-24 17:15:27.907607+00:00,2a00:1370:8180:f390:10f5:bcf9:8f0:be3f,mobile,2025-10-24 17:12:41.450159+00:00,2025-10-24 17:06:39.799+00,2025-10-24 17:10:49.029+00,249230.0
4,2d8dd1db-9d38-49e0-bf03-5b4735523d27,18-24,Female,High school or below,Student,Swiss,Switzerland,Switzerland,Switzerland,7,...,"No, AI didn’t show the classical bar abovd",2025-10-29 09:54:21.050268+00:00,2025-10-29 09:54:41.702835+00:00,2025-10-29 10:01:23.058400+00:00,172.225.188.246,mobile,2025-10-29 09:58:09.817211+00:00,2025-10-29 09:52:50.793+00,2025-10-29 09:54:46.465+00,115672.0
5,2e912156-c7b7-4268-8420-128a859c4876,25-34,Female,Master's degree,Employed,Vietnamese-german,Germany,Germany,Germany,3,...,"Yes, easy to use",2025-10-26 15:45:12.277452+00:00,2025-10-26 15:45:16.132619+00:00,2025-10-26 15:48:06.054724+00:00,84.176.238.16,mobile,2025-10-26 15:46:09.863472+00:00,2025-10-26 15:44:29.794+00,2025-10-26 15:45:39.02+00,69226.0
6,3409f5be-93f2-44f4-8edb-910e95126257,25-34,Female,Bachelor's degree,Employed,British,UK,Uk,UK,5,...,Its easy to use,2025-11-10 07:23:32.649361+00:00,2025-11-10 07:23:42.671639+00:00,2025-11-10 07:25:31.551284+00:00,82.132.245.134,mobile,2025-11-10 07:24:45.804281+00:00,2025-11-10 07:22:57.74+00,2025-11-10 07:23:56.719+00,58979.0
7,35f58cd5-b9a5-4132-a94d-8fcad2800a59,35-44,Female,Bachelor's degree,Student,Kazakh,Kazakhstan,Germany,Germany,6,...,I don’t think so. It is not visually appealing,2025-10-24 19:13:03.589983+00:00,2025-10-24 19:13:54.201872+00:00,2025-10-24 19:22:36.379595+00:00,46.5.2.111,mobile,2025-10-24 19:19:39.938857+00:00,2025-10-24 19:11:36.971+00,2025-10-24 19:14:27.424+00,170453.0
8,3beee5db-499b-4741-b3ea-72c6f17ffb86,25-34,Female,Bachelor's degree,Employed,Vietnamese,Vietnam,Vietnam,Vietnam,6,...,My experience with this version of Google Sear...,2025-10-24 08:55:18.954744+00:00,2025-10-24 08:59:09.367312+00:00,2025-10-24 09:19:59.596176+00:00,104.30.161.158,desktop,2025-10-24 09:04:36.488336+00:00,2025-10-24 08:47:58.199+00,2025-10-24 09:02:52.01+00,893811.0
9,403feef5-647d-4a34-a15a-c4bdc29fb2c2,18-24,Female,High school or below,Student,Latvian,Latvia,Latvia,Latvia,5,...,"Yes, it works well for me",2025-11-09 14:56:32.243330+00:00,2025-11-09 14:57:54.747882+00:00,2025-11-09 15:30:19.979275+00:00,77.38.136.74,desktop,2025-11-09 15:16:04.986421+00:00,2025-11-09 14:41:44.625+00,2025-11-09 14:58:58.615+00,1033990.0


In [480]:
df_surveys_final.loc[6, "q15_price_euro"] = "159,80"
df_surveys_final.loc[6, "q15_price_range"] = "€150–299"

display(df_surveys_final.loc[[7], ["participant_id", "q15_price", "q15_price_euro", "q15_price_range"]])

Unnamed: 0,participant_id,q15_price,q15_price_euro,q15_price_range
7,35f58cd5-b9a5-4132-a94d-8fcad2800a59,606€,60600,€600-799


In [485]:
cols = ["q19_task_easy", "q20_task_quick", "q21_task_familiar"]
likert_cols = [
    "q19_task_easy", "q20_task_quick", "q21_task_familiar", "q22_tool_reliable",
    "q23_tool_practical", "q24_tool_like", "q25_tool_easy_use", "q26_tool_clear",
    "q27_tool_control", "q28_tool_info_provided", "q29_tool_easier_task",
    "q30_tool_useful", "q31_tool_too_much_info", "q32_tool_difficult_focus",
    "q33_results_accurate", "q34_results_trust", "q35_results_complete",
    "q36_results_relevant", "q37_results_useful"
]

other_cols = [c for c in likert_cols if c not in cols]
row_means_other = df_surveys_final[other_cols].mean(axis=1)

for col in cols:
    col_mask = df_surveys_final[col].isna()
    df_surveys_final.loc[col_mask, col] = row_means_other[col_mask]

mask_missing = df_surveys_final[cols].isna().any(axis=1)
missing_rows = df_surveys_final.loc[mask_missing, ["participant_id"] + cols]

print(f"{missing_rows.shape[0]} rows still have at least one missing value in {cols}:")
display(missing_rows)

print("\nMissing counts per column after imputation:")
print(df_surveys_final[cols].isna().sum())

0 rows still have at least one missing value in ['q19_task_easy', 'q20_task_quick', 'q21_task_familiar']:


Unnamed: 0,participant_id,q19_task_easy,q20_task_quick,q21_task_familiar



Missing counts per column after imputation:
q19_task_easy        0
q20_task_quick       0
q21_task_familiar    0
dtype: int64


In [489]:
target_rows = [33, 39, 40, 48]
row_means_subset = df_surveys_final.loc[target_rows, likert_cols].mean(axis=1)

for idx, mean_val in row_means_subset.items():
    for col in ["q32_tool_difficult_focus", "q37_results_useful"]:
        if pd.isna(df_surveys_final.at[idx, col]):
            df_surveys_final.at[idx, col] = mean_val

cols = ["q32_tool_difficult_focus", "q37_results_useful"]
print("Missing counts after imputation:")
print(df_surveys_final[cols].isna().sum())

display(df_surveys_final.loc[target_rows, ["participant_id", *cols]])

Missing counts after imputation:
q32_tool_difficult_focus    0
q37_results_useful          0
dtype: int64


Unnamed: 0,participant_id,q32_tool_difficult_focus,q37_results_useful
33,a8f43d95-44c7-4673-ba52-c8d904b1c1d4,6.0,6.166667
39,c03cfb72-188d-4677-ab8d-eacd1ea933d7,5.0,5.388889
40,c1f0ba55-5c73-4fea-9787-42583c2823aa,7.0,7.0
48,e98964ae-fbfd-42ee-9e6e-20b332d1a03d,5.166667,6.0


In [496]:
df_surveys_final

Unnamed: 0,participant_id,q1_age_group,q2_gender,q3_education,q4_employment,q5_nationality,q5_nationality_sd,q6_residence,q6_residence_sd,q7_chatbot_familiarity,...,q42_comments,background_submitted_at,instruction_submitted_at,postsurvey_submitted_at,ip_address,device_type,results_submitted_at,session_start_time,session_end_time,session_duration_ms
0,09e6255b-ed66-4eea-a314-104a08130ac0,35-44,Female,Doctorate / PhD,Employed,UK,UK,UK,UK,7,...,"I liked the more minimal interface, and I like...",2025-10-31 22:11:56.864775+00:00,2025-10-31 22:12:41.977243+00:00,2025-10-31 22:22:26.969691+00:00,129.67.117.187,desktop,2025-10-31 22:19:44.042285+00:00,2025-10-31 22:10:58.834+00,2025-10-31 22:18:19.43+00,440596.0
1,15ef74b6-a61a-474c-b855-696b20ce58fb,55 and above,Male,Master's degree,Employed,German,Germany,Germany,Germany,1,...,Maybe,2025-10-24 14:12:35.527412+00:00,2025-10-24 14:21:30.862457+00:00,2025-10-24 14:28:43.167002+00:00,213.146.69.174,desktop,2025-10-24 14:27:11.483904+00:00,2025-10-24 14:11:51.631+00,2025-10-24 14:24:48.07+00,776439.0
2,1f0df1be-a1ea-4080-90c3-230fe9e35174,18-24,Female,Bachelor's degree,Student,United States,USA,United States,USA,6,...,"Yes, it showed me relevant results for what I ...",2025-11-10 03:25:51.855943+00:00,2025-11-10 03:26:25.769685+00:00,2025-11-10 03:29:58.830293+00:00,108.2.105.241,desktop,2025-11-10 03:28:20.935272+00:00,2025-11-10 03:25:14.283+00,2025-11-10 03:26:06.698+00,52415.0
3,2afa9961-1844-49e8-80fc-444466532f46,25-34,Female,Master's degree,Employed,Russian,Russia,Russia,Russia,6,...,"Yes, the tool was transparent and easy to use,...",2025-10-24 17:08:04.105300+00:00,2025-10-24 17:10:42.739044+00:00,2025-10-24 17:15:27.907607+00:00,2a00:1370:8180:f390:10f5:bcf9:8f0:be3f,mobile,2025-10-24 17:12:41.450159+00:00,2025-10-24 17:06:39.799+00,2025-10-24 17:10:49.029+00,249230.0
4,2d8dd1db-9d38-49e0-bf03-5b4735523d27,18-24,Female,High school or below,Student,Swiss,Switzerland,Switzerland,Switzerland,7,...,"No, AI didn’t show the classical bar abovd",2025-10-29 09:54:21.050268+00:00,2025-10-29 09:54:41.702835+00:00,2025-10-29 10:01:23.058400+00:00,172.225.188.246,mobile,2025-10-29 09:58:09.817211+00:00,2025-10-29 09:52:50.793+00,2025-10-29 09:54:46.465+00,115672.0
5,2e912156-c7b7-4268-8420-128a859c4876,25-34,Female,Master's degree,Employed,Vietnamese-german,Germany,Germany,Germany,3,...,"Yes, easy to use",2025-10-26 15:45:12.277452+00:00,2025-10-26 15:45:16.132619+00:00,2025-10-26 15:48:06.054724+00:00,84.176.238.16,mobile,2025-10-26 15:46:09.863472+00:00,2025-10-26 15:44:29.794+00,2025-10-26 15:45:39.02+00,69226.0
6,3409f5be-93f2-44f4-8edb-910e95126257,25-34,Female,Bachelor's degree,Employed,British,UK,Uk,UK,5,...,Its easy to use,2025-11-10 07:23:32.649361+00:00,2025-11-10 07:23:42.671639+00:00,2025-11-10 07:25:31.551284+00:00,82.132.245.134,mobile,2025-11-10 07:24:45.804281+00:00,2025-11-10 07:22:57.74+00,2025-11-10 07:23:56.719+00,58979.0
7,35f58cd5-b9a5-4132-a94d-8fcad2800a59,35-44,Female,Bachelor's degree,Student,Kazakh,Kazakhstan,Germany,Germany,6,...,I don’t think so. It is not visually appealing,2025-10-24 19:13:03.589983+00:00,2025-10-24 19:13:54.201872+00:00,2025-10-24 19:22:36.379595+00:00,46.5.2.111,mobile,2025-10-24 19:19:39.938857+00:00,2025-10-24 19:11:36.971+00,2025-10-24 19:14:27.424+00,170453.0
8,3beee5db-499b-4741-b3ea-72c6f17ffb86,25-34,Female,Bachelor's degree,Employed,Vietnamese,Vietnam,Vietnam,Vietnam,6,...,My experience with this version of Google Sear...,2025-10-24 08:55:18.954744+00:00,2025-10-24 08:59:09.367312+00:00,2025-10-24 09:19:59.596176+00:00,104.30.161.158,desktop,2025-10-24 09:04:36.488336+00:00,2025-10-24 08:47:58.199+00,2025-10-24 09:02:52.01+00,893811.0
9,403feef5-647d-4a34-a15a-c4bdc29fb2c2,18-24,Female,High school or below,Student,Latvian,Latvia,Latvia,Latvia,5,...,"Yes, it works well for me",2025-11-09 14:56:32.243330+00:00,2025-11-09 14:57:54.747882+00:00,2025-11-09 15:30:19.979275+00:00,77.38.136.74,desktop,2025-11-09 15:16:04.986421+00:00,2025-11-09 14:41:44.625+00,2025-11-09 14:58:58.615+00,1033990.0
