In [285]:
import os
import pandas as pd
from pathlib import Path

In [286]:
ROOT = Path('../../../')
ROOT.resolve()

PosixPath('/Users/tazminchiles/Documents/GitHub/northumbria-culture-connect')

In [287]:
DATA_PATH = os.path.join(ROOT, 'raw', 'case-studies', 'centre_for_life')
F2F = os.path.join(DATA_PATH, 'F2F Survey Data Apr-Aug.xlsx')
ONLINE = os.path.join(DATA_PATH, 'Online Survey Data Apr-Aug.csv')
TABLET = os.path.join(DATA_PATH, 'Tablet Data Apr-Aug.csv')


OUT_PATH = os.path.join(ROOT, 'src', 'case-study', 'centre-for-life', '_data', 'satisfaction_scores')

f2f = pd.read_excel(F2F)
online = pd.read_csv(ONLINE)
tablet = pd.read_csv(TABLET)

In [288]:
zone_scores_mapping = {
    'Wow Zone (on the ground floor next to Life Café)': 'wow_zone',
    'Wow Zone (on the ground floor next to Life Cafe)': 'wow_zone',
    'Space Zone (on the ground floor next to the Planetarium)': 'space_zone',
    'Brain Zone (on the ground floor, near the entrance and exit)': 'brain_zone',
    'Hello World (on the upper floor)': 'hello_world',
    'Experiment Zone (7+ area on the ground floor)': 'experiment_zone',
    'Science Now! Hub (on the ground floor, currently has information and activities about mushrooms)': 'science_now',
    'Making Studio': 'making_studio',
    'Live science show in the theatre': 'live_science_show',
    'Planetarium Shows': 'planetarium_shows',
    'Play Zone and pop-up play areas (upper floor aimed at under 7s)': 'play_zone'
}

excluded_values = ['Unknown', 'Did not visit']

custom_order = ['Very satisfied', 'Somewhat satisfied', 'Neither satisfied nor dissatisfied', 'Somewhat dissatisfied', 'Very dissatisfied']

tablet_order = [
    'Extremely satisfied', 
    'Quite satisfied', 
    'Neither satisfied nor unsatisfied', 
    'Quite unsatisfied', 
    'Extremely unsatisfied'
]

order_mapping = {
    tablet_order[i]: custom_order[i] for i in range(len(tablet_order))
}


In [289]:
def clean_column_names(data):
    data.columns = (
        data.columns
        .str.strip()
        .str.lower()
        .str.replace(" ", "_", regex=True)
        .str.replace(":", "_", regex=True)
    )
    return data

def handle_missing_values(data):
    categorical_cols = data.select_dtypes(include="object").columns
    data[categorical_cols] = data[categorical_cols].fillna("Unknown")
    
    numerical_cols = data.select_dtypes(include="number").columns
    data[numerical_cols] = data[numerical_cols].fillna(0)
    
    return data

def convert_dates(data, date_column):
    if date_column in data.columns:
        data[date_column] = pd.to_datetime(data[date_column], errors='coerce')
    return data

def convert_to_multi_level_header(data):
    first_row = data.columns.fillna('') 
    second_row = data.iloc[0].fillna('')  
    
    new_header = pd.MultiIndex.from_tuples([(first_row[i], second_row[i]) for i in range(len(first_row))])

    data = data[1:]
    data.columns = new_header

    return data

def clean_data(data, date_column="id_enddate"):
    data = handle_missing_values(data)
    data = clean_column_names(data)
    data = convert_to_multi_level_header(data)
    data = convert_dates(data, date_column)
    return data

In [290]:
online_data = clean_data(online)
f2f_data = clean_data(f2f)
table_data = clean_data(tablet)


  new_header = pd.MultiIndex.from_tuples([(first_row[i], second_row[i]) for i in range(len(first_row))])
  new_header = pd.MultiIndex.from_tuples([(first_row[i], second_row[i]) for i in range(len(first_row))])
  new_header = pd.MultiIndex.from_tuples([(first_row[i], second_row[i]) for i in range(len(first_row))])


Summaries

In [298]:
summaries = pd.DataFrame({
    'total_online': [len(online_data)],
    'total_f2f': [len(f2f_data)],
    'total_tablet': [len(tablet)]
})

summaries.to_csv(os.path.join(OUT_PATH, 'summaries.csv'), index=False)

print(summaries)

   total_online  total_f2f  total_tablet
0           626        594            77


Satisfaction Scores by Area / Exhibition

In [291]:
zone_scores = online_data[['how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area)',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).1',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).2',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).3',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).4',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).5',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).6',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).7',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).8',
                                    'how_satisfied_or_dissatisfied_were_you_with_the_following_areas_of_life_science_centre?_(select_one_rating_per_area).9']]


zone_scores.columns = zone_scores.columns.droplevel(0)


counts = zone_scores.apply(pd.Series.value_counts)

counts = counts.reset_index().rename(columns={'index': 'score'})
counts = counts.rename(columns=zone_scores_mapping)

# Now calculate satisfaction scores as a percentage of people who answered and attended each area.

filtered_df = counts[~counts.isin(excluded_values).any(axis=1)]

numeric_columns = filtered_df.columns[1:]

percentage_filtered_df = filtered_df.copy()
for col in numeric_columns:
    total_responses = filtered_df[col].sum()  
    percentage_filtered_df[col] = (filtered_df[col] / total_responses) * 100 

percentage_filtered_df = percentage_filtered_df.round(2)

percentage_filtered_df = percentage_filtered_df.sort_values(by='wow_zone', ascending=False)

percentage_filtered_df.to_csv(os.path.join(OUT_PATH, 'online_percentage_score.csv'), index=False)


Face to Face

In [292]:
f2f_zone_scores = f2f_data[[
    'q4a',
    'q4b',
    'q4c',
    'q4d',
    'q4e',
    'q4f',
    'q4g',
    'q4h',
    'q4i',
    'q4j',
]]

f2f_zone_scores.columns = f2f_zone_scores.columns.droplevel(0)

f2f_zone_scores = f2f_zone_scores.rename(columns=zone_scores_mapping)

counts = f2f_zone_scores.apply(pd.Series.value_counts)

counts = counts.reset_index().rename(columns={'index': 'score'})

# Now calculate satisfaction scores as a percentage of people who answered and attended each area.

filtered_df = counts[~counts.isin(excluded_values).any(axis=1)]

numeric_columns = filtered_df.columns[1:]

percentage_filtered_df = filtered_df.copy()
for col in numeric_columns:
    total_responses = filtered_df[col].sum()  
    percentage_filtered_df[col] = (filtered_df[col] / total_responses) * 100 

percentage_filtered_df = percentage_filtered_df.round(2).fillna(0)
percentage_filtered_df = percentage_filtered_df.sort_values(by='experiment_zone', ascending=False)

percentage_filtered_df['score'] = pd.Categorical(percentage_filtered_df['score'], categories=custom_order, ordered=True)

percentage_filtered_df = percentage_filtered_df.sort_values(by='score')

percentage_filtered_df.to_csv(os.path.join(OUT_PATH, 'f2f_percentage_total.csv'), index=False)



Tablet surveys

In [293]:
tablet_scores = tablet['how_satisfied_are_you_with_visit_today?']

tablet_scores = tablet_scores.replace(order_mapping)

tablet_scores = tablet_scores[~tablet_scores.isin(excluded_values)]

tablet_scores = tablet_scores.drop(index=tablet_scores.index[0])

tablet_counts = tablet_scores.value_counts()

tablet_counts = tablet_counts.reset_index().rename(columns={'how_satisfied_are_you_with_visit_today?': 'score'})

numeric_columns = tablet_counts.columns[1:]

percentage_filtered_df = tablet_counts.copy()
for col in numeric_columns:
    total_responses = tablet_counts[col].sum()  
    percentage_filtered_df[col] = (tablet_counts[col] / total_responses) * 100 

percentage_filtered_df = percentage_filtered_df.round(2).fillna(0)

percentage_filtered_df['score'] = percentage_filtered_df['score'].astype(str)

percentage_filtered_df['score'] = pd.Categorical(percentage_filtered_df['score'], categories=custom_order, ordered=True)

percentage_filtered_df = percentage_filtered_df.sort_values(by='score')

percentage_filtered_df.to_csv(os.path.join(OUT_PATH, 'tablet_percentage_total.csv'), index=False)
