In [57]:
# Dependencies
import pandas as pd
import numpy as np
import json

In [58]:
# Store filepath in a variable
cityPPIdata = "Data/ResidentialPropertyPriceData.xls"

In [59]:
# Read data file with pandas library
cityPPI_df = pd.read_excel(cityPPIdata, 'Data1')

In [60]:
# Delete unnecessary rows / rows with null values
cityPPI_df = cityPPI_df.drop(cityPPI_df.index[0:15])

In [61]:
# Delete unnecessary columns
cityPPI_df.drop(cityPPI_df.columns[10:28], axis=1, inplace=True)

In [62]:
# Rename columns
new_columns = {cityPPI_df.columns[0]: 'FY',
    cityPPI_df.columns[1]:'Sydney',
    cityPPI_df.columns[2]:'Melbourne',
    cityPPI_df.columns[3]:'Brisbane',
    cityPPI_df.columns[4]:'Adelaide',
    cityPPI_df.columns[5]:'Perth',
    cityPPI_df.columns[6]:'Hobart',
    cityPPI_df.columns[7]:'Darwin',
    cityPPI_df.columns[8]:'Canberra',
    cityPPI_df.columns[9]:'Weighted Average'}

cityPPI_df.rename(columns=new_columns, inplace=True)

cityPPI_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 15 to 78
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   FY                64 non-null     object
 1   Sydney            64 non-null     object
 2   Melbourne         64 non-null     object
 3   Brisbane          64 non-null     object
 4   Adelaide          64 non-null     object
 5   Perth             64 non-null     object
 6   Hobart            64 non-null     object
 7   Darwin            64 non-null     object
 8   Canberra          64 non-null     object
 9   Weighted Average  64 non-null     object
dtypes: object(10)
memory usage: 5.5+ KB


In [63]:
# Convert Period to datetime format
cityPPI_df['FY'] = cityPPI_df['FY'].apply(pd.to_datetime)

In [64]:
# Select only PPI at end of FY
fy_df = cityPPI_df[cityPPI_df.FY.dt.month == 6]

In [65]:
# Convert dates to financial year
fy_df['FY'] = fy_df['FY'].map(lambda x: x.year if x.month > 3 else x.year-1)
fy_df

Unnamed: 0,FY,Sydney,Melbourne,Brisbane,Adelaide,Perth,Hobart,Darwin,Canberra,Weighted Average
16,2005,82.6,63.2,73.3,69.3,62.6,76.9,54.7,69.9,72.2
20,2006,82.3,67.2,77.5,73.4,86.4,83.4,67.1,74.3,77.0
24,2007,85.2,76.0,89.0,81.9,97.7,89.6,74.1,82.0,84.1
28,2008,87.3,85.5,101.3,93.8,97.6,95.2,78.5,87.7,90.0
32,2009,87.5,86.9,98.8,95.8,95.1,96.4,89.0,88.7,90.2
36,2010,101.4,106.2,107.0,104.7,106.6,103.6,100.9,101.6,104.3
40,2011,101.4,103.5,102.5,101.0,100.2,102.9,96.5,101.9,102.0
44,2012,101.4,99.3,99.9,99.6,101.0,98.2,104.1,99.5,100.4
48,2013,108.7,102.7,103.2,100.9,110.6,100.0,111.0,101.0,105.7
52,2014,126.0,112.1,110.5,105.6,114.8,104.1,114.7,103.3,116.4


In [66]:
# Set FY as index
idx = ['FY']
final_PPI_df = fy_df.set_index(idx)

In [69]:
#
d = final_PPI_df.to_json(orient='index')
d

'{"2005":{"Sydney":82.6,"Melbourne":63.2,"Brisbane":73.3,"Adelaide":69.3,"Perth":62.6,"Hobart":76.9,"Darwin":54.7,"Canberra":69.9,"Weighted Average":72.2},"2006":{"Sydney":82.3,"Melbourne":67.2,"Brisbane":77.5,"Adelaide":73.4,"Perth":86.4,"Hobart":83.4,"Darwin":67.1,"Canberra":74.3,"Weighted Average":77},"2007":{"Sydney":85.2,"Melbourne":76,"Brisbane":89,"Adelaide":81.9,"Perth":97.7,"Hobart":89.6,"Darwin":74.1,"Canberra":82,"Weighted Average":84.1},"2008":{"Sydney":87.3,"Melbourne":85.5,"Brisbane":101.3,"Adelaide":93.8,"Perth":97.6,"Hobart":95.2,"Darwin":78.5,"Canberra":87.7,"Weighted Average":90},"2009":{"Sydney":87.5,"Melbourne":86.9,"Brisbane":98.8,"Adelaide":95.8,"Perth":95.1,"Hobart":96.4,"Darwin":89,"Canberra":88.7,"Weighted Average":90.2},"2010":{"Sydney":101.4,"Melbourne":106.2,"Brisbane":107,"Adelaide":104.7,"Perth":106.6,"Hobart":103.6,"Darwin":100.9,"Canberra":101.6,"Weighted Average":104.3},"2011":{"Sydney":101.4,"Melbourne":103.5,"Brisbane":102.5,"Adelaide":101,"Perth":100

In [70]:
data = json.loads(d)
print(data)

{'2005': {'Sydney': 82.6, 'Melbourne': 63.2, 'Brisbane': 73.3, 'Adelaide': 69.3, 'Perth': 62.6, 'Hobart': 76.9, 'Darwin': 54.7, 'Canberra': 69.9, 'Weighted Average': 72.2}, '2006': {'Sydney': 82.3, 'Melbourne': 67.2, 'Brisbane': 77.5, 'Adelaide': 73.4, 'Perth': 86.4, 'Hobart': 83.4, 'Darwin': 67.1, 'Canberra': 74.3, 'Weighted Average': 77}, '2007': {'Sydney': 85.2, 'Melbourne': 76, 'Brisbane': 89, 'Adelaide': 81.9, 'Perth': 97.7, 'Hobart': 89.6, 'Darwin': 74.1, 'Canberra': 82, 'Weighted Average': 84.1}, '2008': {'Sydney': 87.3, 'Melbourne': 85.5, 'Brisbane': 101.3, 'Adelaide': 93.8, 'Perth': 97.6, 'Hobart': 95.2, 'Darwin': 78.5, 'Canberra': 87.7, 'Weighted Average': 90}, '2009': {'Sydney': 87.5, 'Melbourne': 86.9, 'Brisbane': 98.8, 'Adelaide': 95.8, 'Perth': 95.1, 'Hobart': 96.4, 'Darwin': 89, 'Canberra': 88.7, 'Weighted Average': 90.2}, '2010': {'Sydney': 101.4, 'Melbourne': 106.2, 'Brisbane': 107, 'Adelaide': 104.7, 'Perth': 106.6, 'Hobart': 103.6, 'Darwin': 100.9, 'Canberra': 101.6,

In [71]:
# Convert df to json
json_data = final_PPI_df.to_json("/Users/nicksullivan/Desktop/Project-2/Data/PPI_data.json", orient="records")

In [49]:
json_data = [{'data': list(value.values), 'FY': key} for key, value in final_PPI_df.items()]

In [50]:
json_data

[{'data': [82.6,
   82.3,
   85.2,
   87.3,
   87.5,
   101.4,
   101.4,
   101.4,
   108.7,
   126,
   149.8,
   155.2,
   176.6,
   169.8,
   153.5,
   165.9],
  'FY': 'Sydney'},
 {'data': [63.2,
   67.2,
   76,
   85.5,
   86.9,
   106.2,
   103.5,
   99.3,
   102.7,
   112.1,
   120.8,
   130.7,
   148.7,
   152.1,
   137.9,
   150],
  'FY': 'Melbourne'},
 {'data': [73.3,
   77.5,
   89,
   101.3,
   98.8,
   107,
   102.5,
   99.9,
   103.2,
   110.5,
   113.7,
   118.6,
   122.1,
   124.2,
   120.9,
   123.7],
  'FY': 'Brisbane'},
 {'data': [69.3,
   73.4,
   81.9,
   93.8,
   95.8,
   104.7,
   101,
   99.6,
   100.9,
   105.6,
   108.5,
   112.3,
   117.9,
   120.4,
   120.3,
   121.1],
  'FY': 'Adelaide'},
 {'data': [62.6,
   86.4,
   97.7,
   97.6,
   95.1,
   106.6,
   100.2,
   101,
   110.6,
   114.8,
   113.4,
   108,
   104.7,
   103.8,
   99.7,
   99.5],
  'FY': 'Perth'},
 {'data': [76.9,
   83.4,
   89.6,
   95.2,
   96.4,
   103.6,
   102.9,
   98.2,
   100,
   104.1,