In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import jupyterthemes as jtplot
from geopy.geocoders import Nominatim
import re

%config InlineBackend.figure_format = 'retina'                                    # so you can see plots in HD :) 
#jtplot.style(theme='grade3', context='notebook', ticks=True, grid=True)

In [2]:
df_2020 = pd.read_csv(r'data/raw/huie_surveyData_2020.csv')
df_2021 = pd.read_csv(r'data/raw/huie_surveyData_2021.csv')

print(df_2021.keys())

Index(['row hash', 'your role recoded', 'orgtype: charitable',
       'orgtype: inc soc', 'orgtype: voluntary', 'orgtype: māori',
       'orgtype: faith', 'orgtype: philanthropic', 'orgtype:other',
       'paid staff',
       ...
       'sector changes: Access to information and data in one place',
       'sector changes: Funding to cover salaries and operational costs',
       'sector changes: Other (please specify)', 'important learning',
       'comments', 'share', 'again: Yes, I am willing to be surveyed again',
       'again: Yes, I am willing to be interviewed',
       'again: Yes, I would like to see the survey results',
       'again: No, I do not wish to be contacted'],
      dtype='object', length=133)


In [3]:
print(len(df_2020))
print(len(df_2021))

REGIONS = np.array(
            ["Northland/Te Tai Tokerau"
            "Auckland/Tāmakimakaurau", 
            "Waikato",
            "Bay of Plenty/Te MoanaaToi",
            "Gisborne/Te Tai Rāwhiti",
            "Hawke's Bay/Te Mataua Māui",
            "Taranaki",
            "Manawatū",
            "Whanganui",
            "Wellington/Te WhanganuiaTara",
            "Tasman/Te TaioAorere",
            "Nelson/Whakatū",
            "Marlborough/Te Tauihuotewaka",
            "West Coast /Te Tai Poutini",
            "Canterbury/Waitaha",
            "Otago/Ōtākou",
            "Southland/Murihiku",
            "All regions of the North Island/Te IkaaMāui",
            "All regions of the South Island/Te Wai Pounamu",
            "All regions of Aotearoa/New Zealand"]
)



print(max(REGIONS, key=lambda s: (len(s), s)))

n_spaces = [a.count(' ') for a in REGIONS]
print(n_spaces)

362
961
Northland/Te Tai TokerauAuckland/Tāmakimakaurau
[2, 0, 3, 2, 3, 0, 0, 0, 1, 1, 0, 1, 4, 0, 0, 0, 6, 7, 4]


In [4]:
text = 'The COVID-19 pandemic has had multiple structural/operational/financial impacts upon our tangata whenua, community and voluntary organisations in Aotearoa/New Zealand. That considered, the pandemic may have also had impacts on the wellbeing and emotional/morale status and stress levels of people in these organisations. Can you tell us a short story about your experiences?'

if False:
    print(df_2021[text].values)

In [58]:
# define function to aggregate data based on the columns
def aggregate_regions(df_in, reg_cols):
    '''
    Takes raw df as input and outputs df with aggregated 
    regions for each entry in new location column
    '''
    df_out = df_in.copy()
    
    df_out = df_out.replace(np.nan, '', regex=True)                        # replace nans for easier agreggation
    df_out['location'] = df_out[reg_cols].T.agg(','.join)             # aggregation
    df_out = df_out.replace(',,', '', regex=True)                        # gets rid of double commas
    
    return df_out

 
# define function to get rid of first and last comma
def remove_comma(df_in, which):
    '''
    Inputs df from aggregate_regions output and removes the 
    '''
    df_out = df_in.copy()

    # track what comma to remove (index) based on which input
    ind = 0 if which == 'first'  else -1
    # remove comma with lambda function
    with_comma = [(lambda s: s[ind] == ',')(s) for s in df_in['location']]
    if which == 'first': 
        df_out['location'][with_comma] = df_out['location'][with_comma].map(lambda s: str(s)[1:])
    elif which == 'last':
        df_out['location'][with_comma] = df_out['location'][with_comma].map(lambda s: str(s)[:-1])
        
    return df_out

# call region aggregator
region_cols = ['In which region(s) does your organisation operate (please choose all that apply)'] + [f'Unnamed: {j}' for j in range(14,32+1)]
df_2020 = aggregate_regions(df_2020, region_cols)

# call function to remove comma 
#df_2020 = remove_comma(df_2020, which='first')
#df_2020 = remove_comma(df_2020, which='last')

print(df_2020.loc[5, 'location'])
print(df_2020.loc[2, 'location'])

#[is_comma_first(val) for val in df_2020['location'].values]

Auckland/Tāmaki-makau-rau,Waikato,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-TaraNelson/Whakatū,Canterbury/Waitaha,Otago/Ōtākou
Southland/Murihiku


In [45]:
which = 'last'
ind = 0 if which == 'first'  else -1
        
print(ind)

-1


In [59]:
for i in range(2,30):
    print(df_2020.loc[i, 'location'])

Southland/Murihiku
Otago/Ōtākou
Auckland/Tāmaki-makau-rau
Auckland/Tāmaki-makau-rau,Waikato,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-TaraNelson/Whakatū,Canterbury/Waitaha,Otago/Ōtākou
Taranaki
Wellington/Te Whanga-nui-a-Tara
Otago/Ōtākou
WaikatoAll regions of Aotearoa/New Zealand
Southland/Murihiku
Waikato
Auckland/Tāmaki-makau-rauBay of Plenty/Te Moana-a-ToiManawatū-Whanganui,Wellington/Te Whanga-nui-a-Tara,Canterbury/Waitaha,Otago/Ōtākou
Auckland/Tāmaki-makau-rauCanterbury/Waitaha
Wellington/Te Whanga-nui-a-Tara
All regions of the North Island/Te Ika-a-Māui,All regions of the South Island/Te Wai Pounamu,All regions of Aotearoa/New Zealand
All regions of Aotearoa/New Zealand
Marlborough/Te Tauihu-o-te-waka
Northland/Te Tai Tokerau,Auckland/Tāmaki-makau-rau,Waikato,Bay of Plenty/Te Moana-a-Toi,Gisborne/Te Tai Rāwhiti,Hawke's Bay/Te Matau-a-Māui,Taranaki,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-Tara,Tasman/Te Tai-o-Aorere,Nelson/Whakatū,Marlborough/Te Tauihu-o-te-waka,West Coa

In [21]:
# replace NANs with empty string
df_2020 = df_2020.replace(np.nan, '', regex=True)

# define new columns
df_2020['location'] = np.empty((len(df_2020), 0)).tolist()
df_2020['lat'], df_2020['long'] = np.nan, np.nan


#print(type(df_2021.loc[5]['Unnamed: 20']))

In [28]:
if True:
# define geolcoator to get lats and longs
    geolocator = Nominatim(user_agent="Your_Name")

    df_clean = df_2020.copy()

    for i in range(1, len(df_2020)):
        for key in agg_list:
            if (type(df_2020.loc[i][key]) == str) and (df_2020.loc[i][key] != ''):
                df_clean.loc[i]['location'].append(df_2020.loc[i][key])

        if False:    
            if np.shape(df_clean.loc[i]['location'])[0] == 1:
                #print(df_2020.loc[i]['location'][0])
                #df_2020.loc[i]['location'] = df_2020.loc[i]['location'][0]
                # now append lats and longs
                locator = df_clean.loc[i]['location'][0].split('/')[0] + ' New Zealand'
                geolocation = geolocator.geocode(locator)
                df_clean.loc[i]['lat'], df_clean.loc[i]['long'] = geolocation.latitude, geolocation.longitude
                
            elif len(df_2020.loc[i]['location']) == 0:
                df_clean.loc[i]['location'] = np.nan
            
        #else:
        #    df_2020.loc[i]['location'] = ','.join(df_2020.loc[i]['location'])
    

In [29]:
df_2020.loc[1, 'location']

['Wellington/Te Whanga-nui-a-Tara',
 'Wellington/Te Whanga-nui-a-Tara',
 'Wellington/Te Whanga-nui-a-Tara']

In [61]:
df_2020_grouped = df_2020.groupby('location').count()

df_2020_grouped

Unnamed: 0_level_0,Respondent ID,What is your main role within the organisation?,Other role (please specify),What type of organisation do you represent? (Please choose from the list below),Other (please specify),Approximately how many paid staff and/or contractors work with/for your organisation?,Approximately how many volunteers work with your organisation each month?,What is the average number of hours worked by your organisation's volunteers each month?,What was the approximate annual income / operations budget for your organisation for the last financial year?,"Of the following options, which best describes the focus of your organisation's activities or services?",...,Unnamed: 155,Unnamed: 156,Unnamed: 157,Unnamed: 158,"Can you please give us an example of collaboration, sharing, partnerships and/or strategic decisions your organisation has made with other organisations, groups and/or government departments (since the beginning of the COVID-19 pandemic)?","The COVID-19 pandemic has had multiple structural/operational/financial impacts upon our tangata whenua, community and voluntary organisations in Aotearoa/New Zealand. That considered, the pandemic may have also had impacts on the wellbeing and emotional/morale status and stress levels of people in these organisations. Can you tell us a short story about your experiences?",Is there anything else you would like to share with us or comment on?,We seek your permission to include your anonymised responses in our dataset.,lat,long
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
All regions of Aotearoa/New Zealand,51,51,51,51,51,51,51,51,51,51,...,51,51,51,51,51,51,51,51,51,51
"All regions of the North Island/Te Ika-a-Māui,All regions of the South Island/Te Wai Pounamu,All regions of Aotearoa/New Zealand",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
Auckland/Tāmaki-makau-rau,46,46,46,46,46,46,46,46,46,46,...,46,46,46,46,46,46,46,46,46,46
"Auckland/Tāmaki-makau-rau,All regions of Aotearoa/New Zealand",4,4,4,4,4,4,4,4,4,4,...,4,4,4,4,4,4,4,4,4,4
"Auckland/Tāmaki-makau-rau,Waikato",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
"Auckland/Tāmaki-makau-rau,Waikato,Bay of Plenty/Te Moana-a-Toi",2,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
"Auckland/Tāmaki-makau-rau,Waikato,Bay of Plenty/Te Moana-a-Toi,Wellington/Te Whanga-nui-a-Tara",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
"Auckland/Tāmaki-makau-rau,Waikato,Bay of Plenty/Te Moana-a-ToiHawke's Bay/Te Matau-a-Māui,TaranakiWellington/Te Whanga-nui-a-Tara,Tasman/Te Tai-o-Aorere,Nelson/Whakatū,Canterbury/Waitaha,Otago/Ōtākou,Southland/Murihiku",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
"Auckland/Tāmaki-makau-rau,Waikato,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-Tara,Canterbury/Waitaha",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
"Auckland/Tāmaki-makau-rau,Waikato,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-Tara,Canterbury/Waitaha,Otago/ŌtākouAll regions of Aotearoa/New Zealand",1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


## draw map of Aotearoa

In [None]:
import geopandas as gpd

map_df = gpd.read_file("data/gis/regional-council-2022-clipped-generalised.shx")
map_ref = gpd.read_file("data/gis/regional_council_2022_clipped_csv.csv")

map_ref