# Doodles

Exploring the dataset.

In [None]:
# Data downloaded from:
# https://osdatahub.os.uk/downloads/open/OpenNames
#Your use of OS OpenData is subject to the terms at http://os.uk/opendata/licence.
#
#The copyright acknowledgements to use for the attribution statement are:
#
#Contains OS data � Crown Copyright and database rights 2025.
#
#Contains Royal Mail data � Royal Mail copyright and database right 2025.
#
#Contains National Statistics data � Crown copyright and database right 2025

# Quick extract:
#grep "Isle of Wight" opname_csv_gb/Data/*.csv > IW_os_placenames.csv
# Includes false positives

# Better:
# grep "http://data.ordnancesurvey.co.uk/id/7000000000025469" opname_csv_gb/Data/*.csv > IW_os_placenames.csv


In [None]:
#Load in data
import pandas as pd
df = pd.read_csv("IW_os_placenames.csv")

# Preview
df.head()

Unnamed: 0,ID,NAMES_URI,NAME1,NAME1_LANG,NAME2,NAME2_LANG,TYPE,LOCAL_TYPE,GEOMETRY_X,GEOMETRY_Y,...,COUNTY_UNITARY,COUNTY_UNITARY_URI,COUNTY_UNITARY_TYPE,REGION,REGION_URI,COUNTRY,COUNTRY_URI,RELATED_SPATIAL_OBJECT,SAME_AS_DBPEDIA,SAME_AS_GEONAMES
0,SU22.csv:osgb4000000074770417,http://data.ordnancesurvey.co.uk/id/4000000074...,Isle of Wight Hill,,,,landform,Hill Or Mountain,424844,137337,...,Hampshire,http://data.ordnancesurvey.co.uk/id/7000000000...,http://data.ordnancesurvey.co.uk/ontology/admi...,South East,http://data.ordnancesurvey.co.uk/id/7000000000...,England,http://data.ordnancesurvey.co.uk/id/country/en...,,,
1,SU86.csv:osgb4000000074808919,http://data.ordnancesurvey.co.uk/id/4000000074...,Isle of Wight Pond,,,,hydrography,Inland Water,495262,172358,...,Windsor and Maidenhead,http://data.ordnancesurvey.co.uk/id/7000000000...,http://data.ordnancesurvey.co.uk/ontology/admi...,South East,http://data.ordnancesurvey.co.uk/id/7000000000...,England,http://data.ordnancesurvey.co.uk/id/country/en...,,,
2,SZ06.csv:osgb4000000026226540,http://data.ordnancesurvey.co.uk/id/4000000026...,Isle of Wight Road,,,,transportNetwork,Named Road,403311,77620,...,Dorset,http://data.ordnancesurvey.co.uk/id/7000000000...,http://data.ordnancesurvey.co.uk/ontology/admi...,South West,http://data.ordnancesurvey.co.uk/id/7000000000...,England,http://data.ordnancesurvey.co.uk/id/country/en...,,,
3,SZ28.csv:osgb4000000074551734,http://data.ordnancesurvey.co.uk/id/4000000074...,Bouldnor,,,,populatedPlace,Village,436404,89600,...,Isle of Wight,http://data.ordnancesurvey.co.uk/id/7000000000...,http://data.ordnancesurvey.co.uk/ontology/admi...,South East,http://data.ordnancesurvey.co.uk/id/7000000000...,England,http://data.ordnancesurvey.co.uk/id/country/en...,,http://dbpedia.org/resource/Bouldnor,
4,SZ28.csv:osgb4000000074551705,http://data.ordnancesurvey.co.uk/id/4000000074...,Freshwater,,,,populatedPlace,Village,433576,87130,...,Isle of Wight,http://data.ordnancesurvey.co.uk/id/7000000000...,http://data.ordnancesurvey.co.uk/ontology/admi...,South East,http://data.ordnancesurvey.co.uk/id/7000000000...,England,http://data.ordnancesurvey.co.uk/id/country/en...,,"http://dbpedia.org/resource/Freshwater,_Isle_o...",http://sws.geonames.org/2649069


In [None]:
# Cleaning step - filter to IW locations
# 
# # Use: COUNTY_UNITARY_URI
# http://data.ordnancesurvey.co.uk/id/7000000000025469
df = df[df["COUNTY_UNITARY_URI"]=="http://data.ordnancesurvey.co.uk/id/7000000000025469"]

In [None]:
# What types of record are there?
df["TYPE"].unique(), df["LOCAL_TYPE"].unique()

(array(['populatedPlace', 'other', 'transportNetwork', 'landcover',
        'landform', 'hydrography'], dtype=object),
 array(['Village', 'Suburban Area', 'Hamlet', 'Town', 'Postcode',
        'Named Road', 'Section Of Named Road', 'Numbered Road',
        'Woodland Or Forest', 'Hill Or Mountain', 'Other Landcover',
        'Valley', 'Coastal Headland', 'Other Coastal Landform', 'Wetland',
        'Island', 'Cliff Or Slope', 'Bay', 'Harbour',
        'Medical Care Accommodation', 'Electricity Production',
        'Primary Education', 'Vehicular Ferry Terminal',
        'Section Of Numbered Road', 'Spot Height', 'Tidal Water',
        'Secondary Education', 'Special Needs Education',
        'Other Settlement', 'Inland Water', 'Other Landform', 'Channel',
        'Beach', 'Estuary', 'Sea', 'Railway', 'Passenger Ferry Terminal',
        'Non State Primary Education,Non State Secondary Education',
        'Railway Station', 'Bus Station', 'Airport', 'Further Education',
        'Hospice',

In [5]:
df.groupby('TYPE')['LOCAL_TYPE'].value_counts()

TYPE              LOCAL_TYPE                                               
hydrography       Bay                                                            31
                  Inland Water                                                    8
                  Tidal Water                                                     8
                  Estuary                                                         4
                  Channel                                                         3
                  Sea                                                             2
landcover         Woodland Or Forest                                            340
                  Other Landcover                                                69
                  Wetland                                                        20
                  Beach                                                          14
landform          Other Coastal Landform                                         45


In [14]:
road_names = df[df["LOCAL_TYPE"]=="Section Of Named Road"]["NAME1"].unique()
road_names

array(['Norton Green', 'Hill Place Lane', 'Heathfield Road',
       'Windsor Drive', 'Badger Lane', 'Dodpits Lane', 'Colwell Lane',
       'Ivylands Close', 'Spinfish Lane', 'Upper Princes Road',
       'Halletts Shute', 'Golden Hill Fort Road', 'Court Road',
       'Tennyson Road', 'Colwell Road', 'Afton Road', 'Elm Close',
       'Coopers Close', 'Headon Rise', 'Broad Lane', 'Hamstead Road',
       'Alum Bay New Road', 'The Avenue', 'Pixley Hill', 'Arnhem Road',
       'Hill Lane', 'Newport Road', 'Alum Bay Old Road', 'The Causeway',
       'The Sheilings', 'Highdown Lane', 'Gas Works Lane',
       'Hulverstone Lane', 'Silcombe Lane', 'Moons Hill', 'Warden Road',
       'Sea View Road', 'Copse Lane', 'Colwell Chine Road', 'East Close',
       'Military Road', 'Freshwater Road', 'Locksley Close', 'Monks Lane',
       'Coastguard Lane', 'Wellow Top Road', 'Thorley Road',
       'Hamstead Drive', 'Rew Lane', 'Kingates Lane', 'Slay Lane',
       'Pan Lane', 'Chale Street', 'Kemmin Lane',

In [15]:
settlement_names = df[df["TYPE"]=="populatedPlace"]["NAME1"].unique()
settlement_names

array(['Bouldnor', 'Freshwater', 'Cranmore', 'School Green',
       'Thorley Street', 'Afton', 'Thorley', 'Wellow', 'Dunsbury',
       'Hulverstone', 'Norton', 'New Village', 'Shalcombe', 'Middleton',
       'Pound Green', 'Hamstead', 'Totland', 'Brook', 'Locksley',
       'Brookgreen', 'Colwell', 'Freshwater Bay', 'Norton Green',
       'Easton', 'Yarmouth', 'The Orchard', 'Pyle', 'Bonchurch',
       'Ventnor', 'Chale', 'Niton', 'Blackgang', 'Nettlecombe',
       'Atherfield Green', 'Lowtherville', 'Whitwell', 'St Lawrence',
       'Upper Bonchurch', 'Luccombe Village', 'Chale Green', 'Wroxall',
       'Bierley', 'Steephill', 'Cowlease', 'Southford', 'Weeks',
       'Bowcombe', 'Little Whitehouse', 'Plaish', 'Fairlee', 'Littletown',
       'Rew Street', 'Roslin', "Mark's Corner", 'Alverstone',
       'Porchfield', 'Marsh Green', 'Hunny Hill', 'Wootton Common',
       'Gurnard', 'Whiteley Bank', 'Winford', 'Ningwood', 'East Cowes',
       'Branstone', 'Northwood', 'Carisbrooke', 'Shorw

In [23]:
# Road names related to populated place - quick hack
road_names_place_first = []
for r in road_names:
    road_names_place_first.extend([(r, s) for s in settlement_names if s.split()[0] == r.split()[0]and r.split()[0] not in ["St", "The", "Upper", "Little", "Lower", "Upper", "North", "South", "East", "West", "Old"]])

road_names_place_first = set(road_names_place_first)
road_names_place_first

{('Afton Road', 'Afton'),
 ('Alverstone Road', 'Alverstone'),
 ('Alverstone Road', 'Alverstone Garden Village'),
 ('Appley Road', 'Appley'),
 ('Appley Walk', 'Appley'),
 ('Apse Manor Road', 'Apse Heath'),
 ('Ashey Road', 'Ashey'),
 ('Atherfield Road', 'Atherfield Green'),
 ('Barton Road', 'Barton'),
 ('Bathingbourne Lane', 'Bathingbourne'),
 ('Blackgang Road', 'Blackgang'),
 ('Blackwater Hollow', 'Blackwater'),
 ('Blackwater Road', 'Blackwater'),
 ('Bowcombe Road', 'Bowcombe'),
 ('Calbourne Road', 'Calbourne'),
 ('Carpenters Road', 'Carpenters'),
 ('Chale Lane', 'Chale'),
 ('Chale Lane', 'Chale Green'),
 ('Chale Street', 'Chale'),
 ('Chale Street', 'Chale Green'),
 ('Chilton Lane', 'Chilton Green'),
 ('Colwell Chine Road', 'Colwell'),
 ('Colwell Lane', 'Colwell'),
 ('Colwell Road', 'Colwell'),
 ('Downend Road', 'Downend'),
 ('Fairlee Road', 'Fairlee'),
 ('Fernhill', 'Fernhill'),
 ('Five Houses Lane', 'Five Houses'),
 ('Forest Road', 'Forest Side'),
 ('Freshwater Road', 'Freshwater'),
 