## Imports

In [3]:
import datetime

import datamate
import requests
from tqdm.notebook import tqdm

## Name For This Data

In [6]:
name = "price-paid-data/derived"
name_raw = "price-paid-data/raw"

## Load Necessary Configration

In [7]:
key = datamate.config.get_key_for_name(name=name)

path_data = datamate.config.get_path_data_for_key(key=key)
logger = datamate.logging.get_logger(key=key)

logger.info(f"{key=} loaded for {name=}")
logger.info(f"{path_data=}")

key_raw = datamate.config.get_key_for_name(name=name_raw)
path_data_raw = datamate.config.get_path_data_for_key(key=key_raw)

logger.info(f"Using {path_data_raw=} for {key_raw=}")

2023-01-30 17:51:13.059 INFO 1372638304 - <module>: key='price-path-data/derived' loaded for name='price-paid-data/derived'
2023-01-30 17:51:13.060 INFO 1372638304 - <module>: path_data=PosixPath('/media/josie/hdd/data/house-data/price-path-data/derived')
2023-01-30 17:51:13.063 INFO 1372638304 - <module>: Using path_data_raw=PosixPath('/media/josie/hdd/data/house-data/price-paid-data/raw') for key_raw='price-paid-data/raw'


## Data Information

- Primary source of information is https://www.gov.uk/guidance/about-the-price-paid-data

|Data item | Explanation (where appropriate)|
| -- | -- |
|Transaction unique identifier | A reference number which is generated automatically recording each published sale. The number is unique and will change each time a sale is recorded.|
| Price	| Sale price stated on the transfer deed.|
| Date of Transfer| Date when the sale was completed, as stated on the transfer deed.|
| Postcode | This is the postcode used at the time of the original transaction. Note that postcodes can be reallocated and these changes are not reflected in the Price Paid Dataset.|
| Property Type | D = Detached, S = Semi-Detached, T = Terraced, F = Flats/Maisonettes, O = Other|
|Old/New | Indicates the age of the property and applies to all price paid transactions, residential and non-residential. Y = a newly built property, N = an established residential building |
|Duration | Relates to the tenure: F = Freehold, L= Leasehold etc.|
|PAON | Primary Addressable Object Name. Typically the house number or name. |
|SAON | Secondary Addressable Object Name. Where a property has been divided into separate units (for example, flats), the PAON (above) will identify the building and a SAON will be specified that identifies the separate unit/flat. |
|Street | |
|Locality | |
|Town/City | |
|District | |
|County | |
| PPD Category Type | Indicates the type of Price Paid transaction. A = Standard Price Paid entry, includes single residential property sold for value. B = Additional Price Paid entry including transfers under a power of sale/repossessions, buy-to-lets (where they can be identified by a Mortgage), transfers to non-private individuals and sales where the property type is classed as ‘Other’.|
|Record Status - monthly file only |Indicates additions, changes and deletions to the records.(see guide below). A = Addition, C = Change, D = Delete|


In [None]:
url_base = "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com"
filename = "pp-{year}.csv"

url_template = f"{url_base}/{filename}"

year_earliest = 1995

headers = [
    "Transaction unique identifier",
    "Price",
    "Date of Transfer",
    "Postcode",
    "Property Type",
    "Old/New",
    "Duration",
    "PAON",
    "SAON",
    "Street",
    "Locality",
    "Town/City",
    "District",
    "County",
    "PPD Category Type",
    "Record Status - monthly file only",
]

## Year

In [None]:
year_today = datetime.datetime.now().year
logger.info(f"{year_today=}")

## Historical Data Pull

In [None]:
path_headers = path_data.joinpath("headers.csv")
path_headers.write_text(",".join(f'"{header}"' for header in headers) + "\n")
logger.info(f"Headers dumped to {path_headers=}")

for year in tqdm(range(year_earliest, year_today)):
    url = url_template.format(year=year)
    logger.info(f"Reading {url=}")
    response = requests.get(url)
    
    path_file = path_data.joinpath(filename.format(year=year))
    logger.info(f"Dumping to {path_file=}")
    path_file.write_text(response.text)