In [117]:
import pandas as pd
import numpy as np

def vcamsLGA(factor, year, sheet = 0):
    path = './raw_data/vcamsLGA/VCAMS_'+factor+'.xlsx'
    df = pd.read_excel(path, sheet_name=sheet)
    df = df[['Year', 'LGA', 'Indicator']]
    # remove aggregates (victoria)
    df = df[~df['LGA'].str.contains('Victoria')]
    df = df[df['Year'] == year]
    df['LGA'] = df['LGA'].replace('\([a-zA-Z]*\)','', regex = True)
    df['LGA'] = df['LGA'].str.strip()
    df = df.set_index('LGA')
    df.loc[df['Indicator'] == 'NDP'] = np.nan
    df['Indicator'] = pd.to_numeric(df['Indicator'])
    df = df.rename({'Indicator': factor}, axis = 1)
    df= df.drop('Year', axis=1)
    return df

def vcamsDHS(name, year=2014, sheet = 0):
    path = './raw_data/vcamsDHS/VCAMS_'+name+'.xlsx'
    df = pd.read_excel(path, sheet_name=sheet)
    df = df[df['Year'] == year]
    df = df.loc[:, ~df.columns.str.match('Unnamed')]
    df = df[~df['DHS AREA'].str.contains('Victoria')]
    df['DHS AREA'] = df['DHS AREA'].replace('Area', '', regex=True)
    df['DHS AREA'] = df['DHS AREA'].str.strip()
    df = df.drop(['RSE', 'Year'], axis=1)
    df = df.set_index('DHS AREA')
    df.loc[df['Indicator'] == 'NDP'] = np.nan
    df['Indicator'] = pd.to_numeric(df['Indicator'], errors='ignore')
    df = df.rename(columns={'Indicator': name})
    return df



In [85]:
depressionDHS = pd.read_csv('./wrangled/depression2018DHS.csv')
depressionDHS['DHS AREA'] = depressionDHS['DHS AREA'].replace('Area', '', regex=True)
depressionDHS['DHS AREA'] = depressionDHS['DHS AREA'].str.strip()
depressionDHS = depressionDHS.set_index('DHS AREA')

In [18]:
vcamsDHS('financial')

Unnamed: 0_level_0,Indicator
DHS AREA,Unnamed: 1_level_1
Barwon Area,0.04709
Bayside Peninsula Area,0.045726
Brimbank Melton,0.040905
Central Highlands Area,0.020097
Goulburn Area,0.047842
Hume Moreland Area,0.022939
Inner Eastern Melbourne Area,0.019938
Inner Gippsland Area,0.067882
Loddon Area,
Mallee,0.040524


In [43]:
names = [s.strip('\n') for s in open('./raw_data/vcamsDHS/names.txt', "r").readlines()

SyntaxError: invalid syntax (<ipython-input-43-37de3441114d>, line 1)

In [57]:
files = {k: int(v) for line in open('./raw_data/vcamsDHS/names.txt', "r") for (k,v) in [line.split()]}

In [90]:
vcams_DHS_data = [vcamsDHS(i, year = files[i]) for i in files.keys()]

In [91]:
DHS_data = depressionDHS.join(vcams_DHS_data)

In [92]:
DHS_data.to_csv('./wrangled/DHSdata.csv')

In [137]:
# LGA stuff
tsratio = pd.read_csv('./wrangled/tsRatio.csv', index_col = 0)
depression = pd.read_csv('./wrangled/depression.csv', index_col = 'LGA')
aedc = pd.read_csv('./wrangled/AEDC.csv', index_col = 'LGA')
LGAfiles = {k: int(v) for line in open('./raw_data/vcamsLGA/names.txt', "r") for (k,v) in [line.split()]}
vcams_LGA_data = [vcamsLGA(i, year = LGAfiles[i]) for i in LGAfiles.keys()]

In [115]:
aedc

Unnamed: 0_level_0,aedcSocial,aedcLanguage,aedcComm,aedcEmotion,aedcHealth
LGA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alpine,12.0,9.6,7.2,9.6,12.8
Ararat,15.3,14.4,8.5,17.8,14.4
Ballarat,8.2,7.0,6.5,8.9,11.0
Banyule,6.2,3.2,4.1,5.7,5.7
Bass Coast,7.4,6.1,8.5,8.3,8.0
...,...,...,...,...,...
Wodonga,9.0,8.0,6.0,11.1,8.2
Wyndham,10.3,7.6,9.7,8.1,8.5
Yarra,9.9,5.4,6.5,8.9,9.4
Yarra Ranges,8.1,6.4,5.6,8.6,7.8


In [132]:
vcams_LGA_data

[              behavourial
 LGA                      
 Alpine           0.038000
 Ararat                NaN
 Ballarat         0.055649
 Banyule          0.032290
 Bass Coast       0.044937
 ...                   ...
 Wodonga          0.056161
 Wyndham          0.049987
 Yarra            0.015712
 Yarra Ranges     0.040445
 Yarriambiack          NaN
 
 [80 rows x 1 columns],
               bullying
 LGA                   
 Alpine           0.202
 Ararat           0.150
 Ballarat         0.165
 Banyule          0.095
 Bass Coast       0.165
 ...                ...
 Wodonga          0.160
 Wyndham          0.184
 Yarra            0.143
 Yarra Ranges     0.150
 Yarriambiack     0.154
 
 [79 rows x 1 columns],
               connectedness
 LGA                        
 Alpine                0.828
 Ararat                0.888
 Ballarat              0.822
 Banyule               0.901
 Bass Coast            0.880
 ...                     ...
 Wodonga               0.852
 Wyndham               0

In [122]:
tsratio

Unnamed: 0,tsRatio
Alpine,0.038530
Ararat,0.038395
Ballarat,0.048917
Banyule,0.059151
Bass Coast,0.055511
...,...
Wodonga,0.064071
Wyndham,0.062797
Yarra,0.084931
Yarra Ranges,0.060639


In [138]:
LGA_data = depression.join(vcams_LGA_data)

In [141]:
LGA_data = LGA_data.join(aedc).join(tsratio)

In [142]:
LGA_data

Unnamed: 0_level_0,Depression Rate,behavourial,bullying,connectedness,familystress,aedcSocial,aedcLanguage,aedcComm,aedcEmotion,aedcHealth,tsRatio
LGA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Alpine,18.2,0.038000,0.202,0.828,0.151789,12.0,9.6,7.2,9.6,12.8,0.038530
Ararat,22.8,,0.150,0.888,0.086064,15.3,14.4,8.5,17.8,14.4,0.038395
Ballarat,15.6,0.055649,0.165,0.822,0.113772,8.2,7.0,6.5,8.9,11.0,0.048917
Banyule,23.5,0.032290,0.095,0.901,0.114071,6.2,3.2,4.1,5.7,5.7,0.059151
Bass Coast,22.4,0.044937,0.165,0.880,0.109201,7.4,6.1,8.5,8.3,8.0,0.055511
...,...,...,...,...,...,...,...,...,...,...,...
Wodonga,23.7,0.056161,0.160,0.852,0.139887,9.0,8.0,6.0,11.1,8.2,0.064071
Wyndham,18.9,0.049987,0.184,0.754,0.104494,10.3,7.6,9.7,8.1,8.5,0.062797
Yarra,21.3,0.015712,0.143,0.827,0.087847,9.9,5.4,6.5,8.9,9.4,0.084931
Yarra Ranges,23.3,0.040445,0.150,0.852,0.122326,8.1,6.4,5.6,8.6,7.8,0.060639


In [143]:
LGA_data.to_csv('./wrangled/LGA_data.csv')