In [51]:
import pandas as pd

# Read the CSV files
df_2022 = pd.read_csv('survey_results/survey_2022.csv', encoding='utf-8')
df_2023 = pd.read_csv('survey_results/survey_2023.csv', encoding='utf-8')

df_2022['externalId'] = df_2022['externalId'].str.split('_').str[0]
df_2023['externalId'] = df_2023['externalId'].str.split('_').str[0]

# Filter the common respondents based on 'externalId'
df_common = pd.merge(df_2022, df_2023, how='inner', on='externalId', suffixes=('_2022', '_2023'))

# Rename the columns for clarity
df_common.rename(columns={'Score_2022': 'Score 2022', 'Score_2023': 'Score 2023'}, inplace=True)

# Calculate the NPS scores for 2022
nps_2022 = df_common.groupby('business unit_2023').apply(lambda x: ((x['Score 2022'] >= 9).sum() - (x['Score 2022'] < 7).sum()) / len(x) * 100).reset_index()
nps_2022.columns = ['business unit', 'NPS score 2022']

# Calculate the respondents count for 2022
respondents_2022 = df_common.groupby('business unit_2023').count().reset_index()
respondents_2022 = respondents_2022[['business unit_2023', 'Score 2022']]
respondents_2022.columns = ['business unit', 'Common Respondents 2022']

# Repeat the process for the 2023 data
nps_2023 = df_common.groupby('business unit_2023').apply(lambda x: ((x['Score 2023'] >= 9).sum() - (x['Score 2023'] < 7).sum()) / len(x) * 100).reset_index()
nps_2023.columns = ['business unit', 'NPS score 2023']

# Calculate the respondents count for 2023
respondents_2023 = df_common.groupby('business unit_2023').count().reset_index()
respondents_2023 = respondents_2023[['business unit_2023', 'Score 2023']]
respondents_2023.columns = ['business unit', 'Common Respondents 2023']

# Merge the NPS scores for 2022 and 2023, as well as the respondent counts
nps_comparison = pd.merge(pd.merge(nps_2022, nps_2023, on='business unit'), pd.merge(respondents_2022, respondents_2023, on='business unit'), on='business unit')

# Calculate the change in NPS scores
nps_comparison['change'] = nps_comparison['NPS score 2023'] - nps_comparison['NPS score 2022']

# Save the result to a CSV file
nps_comparison.to_csv('nps_comparison.csv', index=False)


In [52]:
import pandas as pd

# Read the CSV files
df_2022 = pd.read_csv('survey_results/survey_2022.csv', encoding='utf-8')
df_2023 = pd.read_csv('survey_results/survey_2023.csv', encoding='utf-8')

# Preprocess 'externalId' column to remove suffixes
df_2022['externalId'] = df_2022['externalId'].str.split('_').str[0]
df_2023['externalId'] = df_2023['externalId'].str.split('_').str[0]

# Filter the common respondents based on 'externalId'
df_common = pd.merge(df_2022, df_2023, how='inner', on='externalId', suffixes=('_2022', '_2023'))

# Rename the columns for clarity
df_common.rename(columns={'Score_2022': 'Score 2022', 'Score_2023': 'Score 2023'}, inplace=True)

# Function to calculate NPS components
def nps_components(x):
    promoters = (x >= 9).sum()
    passives = ((x >= 7) & (x < 9)).sum()
    detractors = (x < 7).sum()
    return pd.Series([promoters, passives, detractors, promoters - detractors], index=['Promoters', 'Passives', 'Detractors', 'NPS'])

# Calculate the NPS scores and components for 2022
nps_2022 = df_common['Score 2022'].pipe(nps_components)
nps_2022.index = ['Promoters 2022', 'Passives 2022', 'Detractors 2022', 'NPS 2022']

# Calculate the NPS scores and components for 2023
nps_2023 = df_common['Score 2023'].pipe(nps_components)
nps_2023.index = ['Promoters 2023', 'Passives 2023', 'Detractors 2023', 'NPS 2023']

# Calculate the share of promoters, passives, and detractors for 2022 and 2023
total_2022 = nps_2022['Promoters 2022'] + nps_2022['Passives 2022'] + nps_2022['Detractors 2022']
total_2023 = nps_2023['Promoters 2023'] + nps_2023['Passives 2023'] + nps_2023['Detractors 2023']

nps_2022['Promoters share 2022'] = nps_2022['Promoters 2022'] / total_2022
nps_2022['Passives share 2022'] = nps_2022['Passives 2022'] / total_2022
nps_2022['Detractors share 2022'] = nps_2022['Detractors 2022'] / total_2022

nps_2023['Promoters share 2023'] = nps_2023['Promoters 2023'] / total_2023
nps_2023['Passives share 2023'] = nps_2023['Passives 2023'] / total_2023
nps_2023['Detractors share 2023'] = nps_2023['Detractors 2023'] / total_2023

# Combine the NPS scores and components for 2022 and 2023
nps_total = pd.concat([nps_2022, nps_2023])

# Save the result to a CSV file
nps_total.to_csv('nps_total.csv')


In [53]:
import pandas as pd

# Read the CSV files
df_2022 = pd.read_csv('survey_results/survey_2022.csv', encoding='utf-8')
df_2023 = pd.read_csv('survey_results/survey_2023.csv', encoding='utf-8')

df_2022['externalId'] = df_2022['externalId'].str.split('_').str[0]
df_2023['externalId'] = df_2023['externalId'].str.split('_').str[0]

# Filter the common respondents based on 'externalId'
df_common = pd.merge(df_2022, df_2023, how='inner', on='externalId', suffixes=('_2022', '_2023'))

# Rename the columns for clarity
df_common.rename(columns={'Score_2022': 'Score 2022', 'Score_2023': 'Score 2023'}, inplace=True)

# Calculate the NPS scores for 2022
nps_2022 = df_common.groupby('division_2022').apply(lambda x: ((x['Score 2022'] >= 9).sum() - (x['Score 2022'] < 7).sum()) / len(x) * 100).reset_index()
nps_2022.columns = ['division', 'NPS score 2022']

# Calculate the respondents count for 2022
respondents_2022 = df_common.groupby('division_2022').count().reset_index()
respondents_2022 = respondents_2022[['division_2022', 'Score 2022']]
respondents_2022.columns = ['division', 'Common Respondents 2022']

# Repeat the process for the 2023 data
nps_2023 = df_common.groupby('division_2023').apply(lambda x: ((x['Score 2023'] >= 9).sum() - (x['Score 2023'] < 7).sum()) / len(x) * 100).reset_index()
nps_2023.columns = ['division', 'NPS score 2023']

# Calculate the respondents count for 2023
respondents_2023 = df_common.groupby('division_2023').count().reset_index()
respondents_2023 = respondents_2023[['division_2023', 'Score 2023']]
respondents_2023.columns = ['division', 'Common Respondents 2023']

# Merge the NPS scores for 2022 and 2023, as well as the respondent counts
nps_comparison = pd.merge(pd.merge(nps_2022, nps_2023, on='division'), pd.merge(respondents_2022, respondents_2023, on='division'), on='division')

# Calculate the change in NPS scores
nps_comparison['change'] = nps_comparison['NPS score 2023'] - nps_comparison['NPS score 2022']

# Save the result to a CSV file
nps_comparison.to_csv('nps_comparison_division.csv', index=False)


In [64]:
import pandas as pd

# Specify 'business unit' or 'division'
SPLIT_BY = 'division'

def nps(score):
    """Calculate NPS category based on score."""
    if score >= 9:
        return 'Promoter'
    elif score >= 7:
        return 'Passive'
    else:
        return 'Detractor'

def calculate_nps(df):
    """Group by business unit and calculate NPS and respondents count."""
    df['Category'] = df['Score'].apply(nps)
    result = df.groupby(SPLIT_BY).apply(lambda x: pd.Series({
        'NPS': ((x['Category'] == 'Promoter').sum() - (x['Category'] == 'Detractor').sum()) / len(x) * 100,
        'Respondents': len(x)
    }))
    return result

def format_results(result, year):
    """Format the results with NPS score and respondent count."""
    result[f'{year} NPS'] = result['NPS'].round(1).astype(str) + ' (' + result['Respondents'].astype(str) + ')'
    return result.drop(columns=['NPS', 'Respondents'])

def read_and_process(year):
    """Read CSV and process data."""
    df = pd.read_csv(f'survey_results/survey_{year}.csv', encoding='utf-8')
    result = calculate_nps(df)
    return format_results(result, year)

def main():
    result_2022 = read_and_process(2022)
    result_2023 = read_and_process(2023)

    # Merge the results
    result = pd.merge(result_2022, result_2023, on=SPLIT_BY, how='outer').fillna(0)

    # Calculate and format the changes
    result['Change'] = (result['2023 NPS'].str.extract(r"(\d+.\d+)").astype(float).fillna(0) - result['2022 NPS'].str.extract(r"(\d+.\d+)").astype(float).fillna(0)).round(1).astype(str) + \
                       ' (' + (result['2023 NPS'].str.extract(r"\((\d+)\)").fillna(0).astype(int) - result['2022 NPS'].str.extract(r"\((\d+)\)").fillna(0).astype(int)).astype(str) + ')'

    # Save to CSV
    result.to_csv('nps_comparison_total_'+SPLIT_BY.replace(' ', '_')+'.csv', index=True)

if __name__ == "__main__":
    main()
