In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import jupyterthemes as jtplot
from geopy.geocoders import Nominatim

%config InlineBackend.figure_format = 'retina'                                    # so you can see plots in HD :) 
#jtplot.style(theme='grade3', context='notebook', ticks=True, grid=True)

In [2]:
df_2020 = pd.read_csv(r'data/raw/huie_surveyData_2020.csv')
df_2021 = pd.read_csv(r'data/raw/huie_surveyData_2021.csv')

print(df_2021.keys())

Index(['row hash', 'your role recoded', 'orgtype: charitable',
       'orgtype: inc soc', 'orgtype: voluntary', 'orgtype: māori',
       'orgtype: faith', 'orgtype: philanthropic', 'orgtype:other',
       'paid staff',
       ...
       'sector changes: Access to information and data in one place',
       'sector changes: Funding to cover salaries and operational costs',
       'sector changes: Other (please specify)', 'important learning',
       'comments', 'share', 'again: Yes, I am willing to be surveyed again',
       'again: Yes, I am willing to be interviewed',
       'again: Yes, I would like to see the survey results',
       'again: No, I do not wish to be contacted'],
      dtype='object', length=133)


In [3]:
print(len(df_2020))
print(len(df_2021))

REGIONS = np.array(
            ["Northland/Te Tai Tokerau"
            "Auckland/Tāmakimakaurau", 
            "Waikato",
            "Bay of Plenty/Te MoanaaToi",
            "Gisborne/Te Tai Rāwhiti",
            "Hawke's Bay/Te Mataua Māui",
            "Taranaki",
            "Manawatū",
            "Whanganui",
            "Wellington/Te WhanganuiaTara",
            "Tasman/Te TaioAorere",
            "Nelson/Whakatū",
            "Marlborough/Te Tauihuotewaka",
            "West Coast /Te Tai Poutini",
            "Canterbury/Waitaha",
            "Otago/Ōtākou",
            "Southland/Murihiku",
            "All regions of the North Island/Te IkaaMāui",
            "All regions of the South Island/Te Wai Pounamu",
            "All regions of Aotearoa/New Zealand"]
)



print(max(REGIONS, key=lambda s: (len(s), s)))

n_spaces = [a.count(' ') for a in REGIONS]
print(n_spaces)

362
961
Northland/Te Tai TokerauAuckland/Tāmakimakaurau
[2, 0, 3, 2, 3, 0, 0, 0, 1, 1, 0, 1, 4, 0, 0, 0, 6, 7, 4]


In [4]:
text = 'The COVID-19 pandemic has had multiple structural/operational/financial impacts upon our tangata whenua, community and voluntary organisations in Aotearoa/New Zealand. That considered, the pandemic may have also had impacts on the wellbeing and emotional/morale status and stress levels of people in these organisations. Can you tell us a short story about your experiences?'

if False:
    print(df_2021[text].values)

In [16]:
# aggregate data based on the columns
df_2020 = df_2020.replace(np.nan, '', regex=True)

agg_list = ['In which region(s) does your organisation operate (please choose all that apply)'] + [f'Unnamed: {j}' for j in range(14,32+1)]

df_2020['location'] = df_2020[agg_list].T.agg(','.join)

df_2020 = df_2020.replace(',,', '', regex=True)
 

df_2020.loc[5, 'location']

',Auckland/Tāmaki-makau-rau,Waikato,Manawatū-Whanganui,Wellington/Te Whanga-nui-a-TaraNelson/Whakatū,Canterbury/Waitaha,Otago/Ōtākou,'

In [18]:
import re
#df_2021 = df_2021.replace(',', '', regex=True)
locs = df_2020['location'].values

df_2020 = df_2020.replace(',,', '', regex=True)


if True:
    for i in range(len(df_2020)):
        if (df_2020.loc[i]['location'] != '') and (df_2020.loc[i]['location'][0] == ',' or df_2020.loc[i]['location'][-1] == ','):
            df_2020.loc[i]['location'] = df_2020.loc[i]['location'].replace(',', '')

        elif df_2020.loc[i]['location'].count(',') > 1:
            df_2020.loc[i]['location'] = df_2020.loc[i]['location'].split(',')

df_2020.loc[6]['location']



'Taranaki'

In [21]:
# replace NANs with empty string
df_2020 = df_2020.replace(np.nan, '', regex=True)

# define new columns
df_2020['location'] = np.empty((len(df_2020), 0)).tolist()
df_2020['lat'], df_2020['long'] = np.nan, np.nan


#print(type(df_2021.loc[5]['Unnamed: 20']))

In [28]:
if True:
# define geolcoator to get lats and longs
    geolocator = Nominatim(user_agent="Your_Name")

    df_clean = df_2020.copy()

    for i in range(1, len(df_2020)):
        for key in agg_list:
            if (type(df_2020.loc[i][key]) == str) and (df_2020.loc[i][key] != ''):
                df_clean.loc[i]['location'].append(df_2020.loc[i][key])

        if False:    
            if np.shape(df_clean.loc[i]['location'])[0] == 1:
                #print(df_2020.loc[i]['location'][0])
                #df_2020.loc[i]['location'] = df_2020.loc[i]['location'][0]
                # now append lats and longs
                locator = df_clean.loc[i]['location'][0].split('/')[0] + ' New Zealand'
                geolocation = geolocator.geocode(locator)
                df_clean.loc[i]['lat'], df_clean.loc[i]['long'] = geolocation.latitude, geolocation.longitude
                
            elif len(df_2020.loc[i]['location']) == 0:
                df_clean.loc[i]['location'] = np.nan
            
        #else:
        #    df_2020.loc[i]['location'] = ','.join(df_2020.loc[i]['location'])
    

In [29]:
df_2020.loc[1, 'location']

['Wellington/Te Whanga-nui-a-Tara',
 'Wellington/Te Whanga-nui-a-Tara',
 'Wellington/Te Whanga-nui-a-Tara']

In [12]:
df_2020_grouped = df_2020.groupby('location').count()

TypeError: unhashable type: 'list'

## draw map of Aotearoa

In [None]:
import geopandas as gpd

map_df = gpd.read_file("data/gis/regional-council-2022-clipped-generalised.shx")
map_ref = gpd.read_file("data/gis/regional_council_2022_clipped_csv.csv")

map_ref