# Editing the Raw Data to be accommodative of easier data transformation and understanding

In [32]:
import pandas as pd
import numpy as np

In [33]:
df_2020_raw = pd.read_csv('raw/2020 Survey Data.csv')
df_2021_raw = pd.read_csv('raw/2021 Survey Data.csv')

In [34]:
def replace_with_ohe(df, col_list):
    """Replacing columns in the dataframe that have just the value populated 
    with the text of the header into a 1 and 0 almost acting as a one hot encoding process (hence the ohe in the name.
    Since all the other values are NaN it will replace it accordingly"""
    for column in col_list:
        df[column] = df[column].fillna(0)
        df.loc[df[column] != 0, column] = 1
    return df
        
replace_with_ohe(df_2020_raw, ['orgtype: Charitable trust', 'orgtype: Incorporated society', 'orgtype: Voluntary organisation', 'orgtype: māori', 
                               'orgtype: faith', 'orgtype: philanthropic', 'region: All of Aotearoa New Zealand',
                               'region: All of North Island', 'region: All of South Island', 'region: Northland',
                               'region: Auckland', 'region: Waikato', 'region: Bay of Plenty', 'region: Gisborne',
                               "region: Hawke's Bay", 'region: Taranaki', 'region: Manawatu-Wanganui', 'region: Wellington',
                               'region: Tasman-Nelson-Marlborough', 'region: West Coast', 'region: Canterbury', 'region: Otago',
                               'region: Southland'])
replace_with_ohe(df_2020_raw, ['ethnic: General (no ethnic-specific focus)', 'ethnic: New Zealand European', 'ethnic: māori',
                               'ethnic: Samoan', 'ethnic: Cook Island Māori', 'ethnic: Tongan', 'ethnic: Niuean', 'ethnic: Chinese',
                               'ethnic: Indian', 'ethnic: MELA'])
replace_with_ohe(df_2020_raw, ['challenges: Ensuring sufficient revenue to maintain viability',	'challenges: Being able to pay our staff',
                               'challenges: Changing our service provision to meet public health criteria',	'challenges: Meeting the needs of the people we support',
                               'challenges: Ensuring our staff and volunteers are well supported', 'challenges: Developing new service offerings',
                               'challenges: Loss of volunteers','challenges: Meeting the levels of work required'])
replace_with_ohe(df_2020_raw, ['opportunities: Increasing our volunteer base','opportunities: Improving our profile',
                               'opportunities: Greater appreciation for and recognition of our work','opportunities: New ways of connecting with funders and/or supporters',
                               'opportunities: Working closer with other organisations for a common good','opportunities: New ways of connecting with those using our services',
                               'opportunities: New service offerings','opportunities: Faster decision-making','opportunities: Longer contracts or funding agreements',
                               'opportunities: More flexibility in contracts or funding agreements','opportunities: Stronger sense of community or common values',
                               'opportunities: Increasing our funding','opportunities: Ability to move quickly',
                               'opportunities: Better business continuity planning','opportunities: Options around where and how we work'])

# export the OHE data for 2020 to a csv
df_2020_raw.to_csv('processed/df_2020_ohe.csv')

In [35]:
from matplotlib import pyplot as plt
def bar_chart_categories(df, lst, pth):
    for item in lst:
        starter = item[0]
        columns_selected = []
        sums = []
        for column in df.columns:
            if column.startswith(starter) == True and 'other' not in column and 'Other' not in column:
                columns_selected.append(column)
        for column in columns_selected:
            sums.append(df[column].sum())
        columns_selected = [x.replace(starter, "") for x in columns_selected]
        columns_selected = [x.title() for x in columns_selected]
        temp_df = pd.DataFrame(list(zip(columns_selected, sums)), columns = [item[1], 'Totals'])
        fig, ax = plt.subplots()
        hbars = ax.barh(temp_df[item[1]],temp_df['Totals'], color=(0.2, 0.4, 0.6, 0.6))
        ax.bar_label(hbars)
        fig.patch.set_facecolor('white')
        ax.set_title(f"{item[1]}")
        ax.set_xlabel('No. of Responses')
        ax.set_ylabel('Options (excl. Other)')
        fig.savefig(f"{pth}{item[1]}.png", bbox_inches="tight")
        del temp_df
        plt.clf()

lst_2020 = [['orgtype: ', 'Organisation Type'], ['region: ', 'Region'], ['ethnic: ', 'Ethnicity Serviced'],
       ['challenges: ', 'Challenges Encountered'], ['opportunities: ', 'Opportunities']]

bar_chart_categories(df_2020_raw, lst_2020, "../assets/2020 graphs/")

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

In [38]:
org_2021_group = df_2021_raw.groupby('What type of organisation do you represent? (Please choose from the list below)')['Respondent ID'].count().reset_index()
fig, ax = plt.subplots()
hbars = ax.barh(org_2021_group['What type of organisation do you represent? (Please choose from the list below)'],
                org_2021_group['Respondent ID'], color=(0.2, 0.4, 0.6, 0.6))
ax.bar_label(hbars)
ax.set_title(f"Organisation Type")
ax.set_xlabel('No. of Responses')
ax.set_ylabel('Options (excl. Other)')
fig.patch.set_facecolor('white')
fig.savefig(f"../assets/2021 graphs/Organisation Type.png", bbox_inches="tight")
plt.clf()

org_2021_group.rename(columns = {'Respondent ID': "Count"}, inplace = True)


<Figure size 432x288 with 0 Axes>

In [37]:
replace_with_ohe(df_2021_raw, [
    "Region: Northland/Te Tai Tokerau","Region: Auckland/Tāmaki-makau-rau","Region: Waikato","Region: Bay of Plenty/Te Moana-a-Toi",
    "Region: Gisborne/Te Tai Rāwhiti","Region: Hawke's Bay/Te Matau-a-Māui","Region: Taranaki","Region: Manawatū-Whanganui","Region: Wellington/Te Whanga-nui-a-Tara",
    "Region: Tasman/Te Tai-o-Aorere","Region: Nelson/Whakatū","Region: Marlborough/Te Tauihu-o-te-waka","Region: West Coast /Te Tai Poutini",
    "Region: Canterbury/Waitaha","Region: Otago/Ōtākou","Region: Southland/Murihiku","Region: All regions of the North Island/Te Ika-a-Māui",
    "Region: All regions of the South Island/Te Wai Pounamu","Region: All regions of Aotearoa/New Zealand","Region: We don't operate in Aotearoa/New Zealand"
])
replace_with_ohe(df_2021_raw,[
    "Ethnic Groups: New Zealand European/Pakeha/Kiwi","Ethnic Groups: Māori","Ethnic Groups: Samoan","Ethnic Groups: Cook Islands Māori",
    "Ethnic Groups: Tongan","Ethnic Groups: Niuean","Ethnic Groups: Tokelauan","Ethnic Groups: Fijian","Ethnic Groups: Southeast Asian","Ethnic Groups: Chinese",
    "Ethnic Groups: Indian","Ethnic Groups: Middle Eastern/Latin American/African (MELAA)","Ethnic Groups: Continental European"
])
replace_with_ohe(df_2021_raw,[
    "Five Main Challenges: Ensuring sufficient revenue to maintain viability","Five Main Challenges: Being able to pay our staff",
    "Five Main Challenges: Changing our service provision to meet public health criteria","Five Main Challenges: Meeting the needs of the people we support",
    "Five Main Challenges: Ensuring our staff and volunteers are well supported","Five Main Challenges: Developing new service offerings",
    "Five Main Challenges: Loss of volunteers","Five Main Challenges: Meeting levels of work required"
])
replace_with_ohe(df_2021_raw, [
    "Five Opportunities or Personal Outcomes: Increasing our volunteer base", "Five Opportunities or Personal Outcomes: Improving our profile",
    "Five Opportunities or Personal Outcomes: Greater appreciation for and recognition of our work", "Five Opportunities or Personal Outcomes: New ways of connecting with funders and/or supporters",
    "Five Opportunities or Personal Outcomes: Working closer with other organisations for a common good", "Five Opportunities or Personal Outcomes: Delivering new types of services",
    "Five Opportunities or Personal Outcomes: Faster decision making","Five Opportunities or Personal Outcomes: Longer contracts or funding agreements",
    "Five Opportunities or Personal Outcomes: Being more flexible and adjusting our services", "Five Opportunities or Personal Outcomes: Better business continuity planning",
    "Five Opportunities or Personal Outcomes: Options around where and how we work", "Five Opportunities or Personal Outcomes: No new opportunities or positive outcomes"
])

df_2021_raw.to_csv('processed/df_2021_ohe.csv')

lst_2021 = [['Region: ', 'Region'], ['Ethnic Groups: ', 'Ethnicity Serviced'],
       ['Five Main Challenges: ', 'Five Main Challenges Encountered'], ['Five Opportunities or Personal Outcomes: ', 'Five Opportunities or Personal Outcomes']]

bar_chart_categories(df_2021_raw, lst_2021,"../assets/2021 graphs/")


<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>

<Figure size 432x288 with 0 Axes>