# NSW Domain csv datetime converter

## Import libraries and dependencies

In [50]:
# Import the pandas and pathlib libraries
import pandas as pd
from pathlib import Path
import numpy as np
import os

## Create a Path to the File Using Pathlib & Read it into a Pandas DataFrame

In [51]:
# Use the Pathlib libary to set the path to the CSV
# csv_path = Path("Resources/data.csv")
# csv_path = Path("Resources/data_25082021.csv")
# csv_path = Path("datav2_23082021.csv")
csv_path = Path("Resources/clean_suburbs_49.csv")

# Use the file path to read the CSV into a DataFrame and display a few rows
df = pd.read_csv(csv_path)
df

Unnamed: 0,year,month,suburb,medianSoldPrice,numberSold,highestSoldPrice,numberSaleListing,highestSaleListingPrice,lowestSaleListingPrice,auctionNumberAuctioned,auctionNumberSold,medianRentListingPrice
0,2011,8,Randwick,1422000,40,5450000,81,8000000.0,775000.0,32.0,20.0,950.0
1,2011,11,Randwick,1417000,49,3660000,97,6000000.0,700000.0,47.0,23.0,880.0
2,2012,2,Randwick,1520000,24,6200000,62,5850000.0,570000.0,10.0,5.0,975.0
3,2012,5,Randwick,1555000,44,3325000,77,3590000.0,570000.0,30.0,17.0,850.0
4,2012,8,Randwick,1385000,27,3050000,57,4250000.0,650000.0,19.0,11.0,825.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1955,2020,6,Woy Woy,630000,46,1100000,159,1750000.0,350000.0,8.0,3.0,415.0
1956,2020,9,Woy Woy,645000,67,2000000,118,1600000.0,375000.0,8.0,4.0,410.0
1957,2020,12,Woy Woy,660000,68,1510000,103,4500000.0,60000.0,7.0,4.0,450.0
1958,2021,3,Woy Woy,750000,80,1710000,97,4500000.0,60000.0,8.0,6.0,450.0


In [52]:
df.month.unique()

array([ 8, 11,  2,  5,  9, 12,  3,  6], dtype=int64)

In [53]:
df['suburb'].value_counts()

Frenchs Forest      40
Byron Bay           40
Katoomba            40
Cromer              40
Casino              40
Baulkham Hills      40
Murwillumbah        40
Epping              40
Lennox Head         40
Plumpton            40
Alexandria          40
Lane Cove           40
Pottsville          40
Forster             40
Quakers Hill        40
Alstonville         40
Allambie Heights    40
Yamba               40
Kellyville          40
Marsfield           40
St Andrews          40
Pennant Hills       40
Paddington          40
Woy Woy             40
Yagoona             40
Castle Hill         40
Bundanoon           40
Mosman              40
Marrickville        40
Mortdale            40
Pymble              40
Warners Bay         40
Fairfield West      40
Merewether          40
Fletcher            40
Maryland            40
Hornsby             40
Shortland           40
Thornleigh          40
Ballina             40
Bathurst            40
Banora Point        40
Campbelltown        40
Cooranbong 

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1960 entries, 0 to 1959
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   year                     1960 non-null   int64  
 1   month                    1960 non-null   int64  
 2   suburb                   1960 non-null   object 
 3   medianSoldPrice          1960 non-null   int64  
 4   numberSold               1960 non-null   int64  
 5   highestSoldPrice         1960 non-null   int64  
 6   numberSaleListing        1960 non-null   int64  
 7   highestSaleListingPrice  1957 non-null   float64
 8   lowestSaleListingPrice   1955 non-null   float64
 9   auctionNumberAuctioned   1720 non-null   float64
 10  auctionNumberSold        1521 non-null   float64
 11  medianRentListingPrice   1943 non-null   float64
dtypes: float64(5), int64(6), object(1)
memory usage: 183.9+ KB


In [55]:
df.head()

Unnamed: 0,year,month,suburb,medianSoldPrice,numberSold,highestSoldPrice,numberSaleListing,highestSaleListingPrice,lowestSaleListingPrice,auctionNumberAuctioned,auctionNumberSold,medianRentListingPrice
0,2011,8,Randwick,1422000,40,5450000,81,8000000.0,775000.0,32.0,20.0,950.0
1,2011,11,Randwick,1417000,49,3660000,97,6000000.0,700000.0,47.0,23.0,880.0
2,2012,2,Randwick,1520000,24,6200000,62,5850000.0,570000.0,10.0,5.0,975.0
3,2012,5,Randwick,1555000,44,3325000,77,3590000.0,570000.0,30.0,17.0,850.0
4,2012,8,Randwick,1385000,27,3050000,57,4250000.0,650000.0,19.0,11.0,825.0


## Convert 'year' and 'month' columns to combined 'datetime' column

In [56]:
df['ymd'] = df['year'].astype(str)+'-'+df['month'].astype(str)+'-'+str(1)
df['ymd_dt'] = pd.to_datetime(df['ymd'])
df['qtr'] = pd.PeriodIndex(df['ymd_dt'],freq = 'Q')
df['date'] = pd.PeriodIndex(df['qtr'], freq = 'Q').to_timestamp()
df.index = df['date']
df.drop(['year', 'month'], axis=1, inplace=True)


In [60]:
df.head()

Unnamed: 0_level_0,suburb,medianSoldPrice,numberSold,highestSoldPrice,numberSaleListing,highestSaleListingPrice,lowestSaleListingPrice,auctionNumberAuctioned,auctionNumberSold,medianRentListingPrice,ymd,ymd_dt,qtr,date
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2011-07-01,Randwick,1422000,40,5450000,81,8000000.0,775000.0,32.0,20.0,950.0,2011-8-1,2011-08-01,2011Q3,2011-07-01
2011-10-01,Randwick,1417000,49,3660000,97,6000000.0,700000.0,47.0,23.0,880.0,2011-11-1,2011-11-01,2011Q4,2011-10-01
2012-01-01,Randwick,1520000,24,6200000,62,5850000.0,570000.0,10.0,5.0,975.0,2012-2-1,2012-02-01,2012Q1,2012-01-01
2012-04-01,Randwick,1555000,44,3325000,77,3590000.0,570000.0,30.0,17.0,850.0,2012-5-1,2012-05-01,2012Q2,2012-04-01
2012-07-01,Randwick,1385000,27,3050000,57,4250000.0,650000.0,19.0,11.0,825.0,2012-8-1,2012-08-01,2012Q3,2012-07-01


## Write DataFrame to csv

In [61]:
df.to_csv("Resources/nsw_suburbs_49.csv")

## Done. Move over to suburb_analysis.pyinb to see how pretty this data can get ;)