In [8]:
import requests
import pymysql
import csv
import pandas as pd
import osmnx as ox
import yaml
import zipfile
import io
import math
import numpy as np
import os

# import httplib2
# import oauth2
# import tables
# import mongodb
# import sqlite


################ These functions are used for downloading data and transforming it by the scheme csv <-> db <-> df ################

def download_data_csv(file_url_list, file_name_list):
    """
    Download data from the web in the format of csv.
    """

    if len(file_url_list) != len(file_name_list):
        raise Exception("file_url_list and file_name_list must be of equal length")

    for i in range(len(file_url_list)):
        file_url = file_url_list[i]
        file_name = file_name_list[i]
        print (f"Downloading data from {file_url}")
        response = requests.get(file_url)
        if response.status_code == 200:
            with open("./" + file_name, "wb") as file:
                file.write(response.content)

def download_data_zip(file_url, file_name):
    """
    Download data from the web in the format of zip.
    """
    
    # Getting zip
    response = requests.get(file_url)

    if response.status_code == 200:
        print("ZIP file has been downloaded successfully!")

        # Getting csv
        zip_file = zipfile.ZipFile(io.BytesIO(response.content))
        csv_file = zip_file.open(file_name)

        # Saving csv
        file = open("./" + file_name, "wb")
        file.write(csv_file.read())
        print("File has been saved successfully.")

def download_census_data(code, base_dir=''):

    url = f'https://www.nomisweb.co.uk/output/census/2021/census2021-{code.lower()}.zip'
    extract_dir = os.path.join(base_dir, os.path.splitext(os.path.basename(url))[0])

    if os.path.exists(extract_dir) and os.listdir(extract_dir):
        print(f"Files already exist at: {extract_dir}.")
        return

    os.makedirs(extract_dir, exist_ok=True)
    response = requests.get(url)
    response.raise_for_status()

    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
        zip_ref.extractall(extract_dir)

    print(f"Files extracted to: {extract_dir}")

def load_census_data(code, level='msoa'):
    
    return pd.read_csv(f'census2021-{code.lower()}/census2021-{code.lower()}-{level}.csv')

def create_connection(database='ads_2024'):
    """ 
    Create a database connection to the MariaDB database 
    specified by the host url and database name.
    Returns Connection object or None.
    """
    
    with open("credentials.yaml") as file:
        credentials = yaml.safe_load(file)

    conn = None
    try:
        conn = pymysql.connect(user=credentials["username"],
                               passwd=credentials["password"],
                               host=credentials["url"],
                               port=(int)(credentials["port"]),
                               local_infile=1,
                               db=database
                               )
        print(f"Connection established!")
    except Exception as e:
        print(f"Error connecting to the MariaDB Server: {e}")
    
    return conn

def csv_to_df(file_name):

    try:
        df = pd.read_csv("./" + file_name)
        return df
    except Exception as e:
        print(f"Error loading CSV file: {e}")
        return None

def csv_to_db(conn, file_name, table_name, ignore_first_row):

    cur = conn.cursor()

    csv_file_path = './' + file_name
    print(f'Loading data into the table')
    if ignore_first_row:
        cur.execute(f"LOAD DATA LOCAL INFILE '{csv_file_path}' INTO TABLE `{table_name}` FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED by '\"' LINES STARTING BY '' TERMINATED BY '\n' IGNORE 1 LINES;")
    else:
        cur.execute(f"LOAD DATA LOCAL INFILE '{csv_file_path}' INTO TABLE `{table_name}` FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED by '\"' LINES STARTING BY '' TERMINATED BY '\n';")
    print(f'Data loaded into the table {table_name}')

    conn.commit()

def db_to_csv(conn, table_name, file_name):

    try:
        cur = conn.cursor()

        cur.execute(f"SELECT * FROM `{table_name}`;")
        column_titles = [desc[0] for desc in cur.description]
        rows = cur.fetchall()
        
        with open(file_name, mode='w', newline='', encoding='utf-8') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(column_titles)
            writer.writerows(rows)
        print(f"Data successfully exported to {file_name}")
    except Exception as e:
        print(f"Error: {e}")
    finally:
        conn.commit()

def df_to_csv(df, file_name):
    
    try:
        df.to_csv("./" + file_name, index=False)
        print(f"DataFrame successfully saved to {file_name}")
    except Exception as e:
        print(f"Error: {e}")

################ These functions are used for operating with OSM-style data ################

def get_bounds(lat, lon, km_box_dimension):
    """
    lat: Latitude of the center point in decimal degrees.
    lon: Longitude of the center point in decimal degrees.
    km_distance: Distance between sides in kilometers.
    Returns (up, down, left, right): border coordinates.
    """

    earth_radius = 6371.0 # km
    km_distance = km_box_dimension / 2

    angular_distance = km_distance / earth_radius
    
    up_lat = lat + math.degrees(angular_distance)
    down_lat = lat - math.degrees(angular_distance)
    
    left_lon = lon - math.degrees(angular_distance / math.cos(math.radians(lat)))
    right_lon = lon + math.degrees(angular_distance / math.cos(math.radians(lat)))
    
    return (up_lat, down_lat, left_lon, right_lon)

def get_bounds_by_name(place_name):
    
    # Fetch the boundary polygon for the given place name
    gdf = ox.geocode_to_gdf(place_name)
    
    bounds = gdf.total_bounds

    return bounds

def km_distance(lat1, lon1, lat2, lon2):

    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Radius of the Earth in kilometers
    R = 6371.0

    # Differences in coordinates
    dlat = lat2 - lat1
    dlon = lon2 - lon1

    # Haversine formula
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    
    km_distance = R * c
    return km_distance

def get_all_buildings_from_osm_by_bounds(bounds, tags = {'building': True}):

    (north, south, east, west) = bounds

    # Get information about all buildings in the area
    all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
    
    return all_buildings_from_osm


In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import osmnx as ox
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score

# import bokeh
# import seaborn
# import sklearn.decomposition as decomposition
# import sklearn.feature_extraction


def csv_preview(file_name):

    try:
        with open("./" + file_name, 'r') as file:
            column_titles = file.readline().strip()
            print("Column Titles:")
            print(column_titles)

            print("\nFirst 10 Rows:")
            for i in range(10):
                row = file.readline().strip()
                if row:
                    print(row)
                else:
                    break
    except Exception as e:
        print(f"Error: {e}")

def execute_query(conn, query):
    """
    Examples are:
    SHOW TABLES;
    SHOW TABLE STATUS LIKE '{table_name}';
    SHOW INDEX FROM `{table_name}`;
    SELECT * FROM `{table_name}` LIMIT {sample_size};
    SELECT count(*) FROM `{table_name}`;
    SELECT MIN({column}) AS min_value, MAX({column}) AS max_value FROM `{table_name}`
    """

    cur = conn.cursor()
    cur.execute(query)
    rows = cur.fetchall()

    conn.commit()

    return rows

def create_table_by_query(conn, table_name, columns, column_names, data_types, constraints):

    if columns != len(column_names):
        raise Exception("columns != len(column_names)")
    elif columns != len(data_types):
        raise Exception("columns != len(data_types)")
    elif columns != len(constraints):
        raise Exception("columns != len(constraints)")
    else:
        query = f'CREATE TABLE {table_name} ({", ".join([f"`{column_names[i]}` {data_types[i]} {constraints[i]}" for i in range(columns)])})'
        print(query)
        execute_query(conn, query)

def create_single_index(conn, index_name, table_name, field_name):

    rows = execute_query(conn, f"CREATE INDEX {index_name} ON {table_name}({field_name});")

    return rows

def create_multiple_index(conn, index_name, table_name, field_names):

    rows = execute_query(conn, f"CREATE INDEX {index_name} ON `{table_name}` ({', '.join(field_names)})")

    return rows

def get_summary_on_db(conn):

    tables = execute_query(conn, "SHOW TABLES;")

    for row in tables:
        table_name = row[0]
        print(f"\nTable: {table_name}")

        table_status = execute_query(conn, f"SHOW TABLE STATUS LIKE '{table_name}';")
        approx_row_count = table_status[0][4] if table_status else 'Unable to fetch row count'
        print("\nApprox Row Count:", approx_row_count//100000/10, "M")

        limit = 5
        column_names = execute_query(conn, f"SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}';")
        first_rows = execute_query(conn, f"SELECT * FROM `{table_name}` LIMIT {limit};")
        print(tuple(item[0] for item in column_names))
        for row in first_rows:
            print(row)

        indices = execute_query(conn, f"SHOW INDEX FROM `{table_name}`;")
        if indices:
            print("\nIndices:")
            for index in indices:
                print(f" - {index[2]} ({index[10]}): Column {index[4]}")
        else:
            print("\nNo indices set on this table.")

In [36]:
import sys

def currently_unused_download_data(file_url_list, file_name_list):
    """
    Download data from the web in the format of CSV, displaying download progress.
    """

    if len(file_url_list) != len(file_name_list):
        raise Exception("file_url_list and file_name_list must be of equal length")

    for i in range(len(file_url_list)):
        file_url = file_url_list[i]
        file_name = file_name_list[i]
        print(f"Downloading data from {file_url}")
        
        # Send GET request with streaming
        response = requests.get(file_url, stream=True)
        
        if response.status_code == 200:
            # Get total file size from headers
            total_size = int(response.headers.get('content-length', 0))
            if total_size == 0:
                print(f"Warning: Content-Length is missing. Unable to calculate progress.")
            
            # Open the file for writing in binary mode
            with open(file_name, "wb") as file:
                downloaded_size = 0
                for chunk in response.iter_content(chunk_size=1024):  # download in chunks of 1 KB
                    if chunk:  # filter out keep-alive new chunks
                        file.write(chunk)
                        downloaded_size += len(chunk)
                        
                        # Calculate and display the progress
                        progress = (downloaded_size / total_size) * 100
                        print(f"\rDownloading: {progress:.2f}% ({downloaded_size}/{total_size} bytes)", end="")
                        
                        # Flush output to show progress immediately
                        sys.stdout.flush()
            
            print("\nDownload completed!")
        else:
            print(f"Failed to download {file_url} with status code {response.status_code}")

In [None]:
import requests
import geopandas as gpd
from shapely.geometry import Point, Polygon

overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
area["name"="Luxembourg"]->.searchArea;
(
  node["building"](area.searchArea);
  way["building"](area.searchArea);
  relation["building"](area.searchArea);
);
out body;
>;
out skel qt;
"""
response = requests.get(overpass_url, params={'data': overpass_query})

if response.status_code == 200:
    data = response.json()
    elements = data['elements']

    # Parse nodes
    nodes = {element['id']: (element['lon'], element['lat'])
             for element in elements if element['type'] == 'node'}

    # Parse ways and build geometries
    buildings = []
    for element in elements:
        if element['type'] == 'way' and 'tags' in element:
            node_refs = element['nodes']
            coordinates = [nodes[node_id] for node_id in node_refs if node_id in nodes]
            if len(coordinates) > 2:  # Ensure valid polygons
                polygon = Polygon(coordinates)
                buildings.append({
                    "id": element['id'],
                    "tags": element.get('tags', {}),
                    "geometry": polygon
                })

    # Convert to GeoDataFrame
    if buildings:
        gdf = gpd.GeoDataFrame(buildings)
        gdf['geometry'] = gdf['geometry'].apply(lambda x: x)  # Set geometry column
        gdf.to_csv('osm_buildings_l.csv', index=False)
        print("Data saved to osm_buildings_l.csv")
    else:
        print("No buildings found.")
else:
    print(f"Failed to fetch data: {response.status_code}")


# Assessment for Advanced Data Science
# *Data Pipelines*

## Radzim Sendyka, Christian Cabrera, Carl Henrik Ek and Neil D. Lawrence

### 19 November 2024

Welcome to the course assessment for the Advanced Data Science unit. In this assessment, you will build two data pipelines and prediction systems.

The maximum total mark for this assessment is 20. That mark is split into two miniprojects below.
- The first miniproject is worth 7 marks.
- The second miniproject is worth 13 marks.

*The check Session for this Assessment is 03 December 2024.*

# Task 1 (7 marks)

This example project builds on your experience from the practicals, using Open Street Maps, and connecting them to real-world datasets.

Your prediction system will be based on data from the 2021 UK Census, and Open Street Maps.

### Build a data pipeline and prediction system.

Structure your system around the [Access-Assess-Address](https://inverseprobability.com/talks/notes/access-assess-address-a-pipeline-for-automated-data-science.html) framework for automated data science.
Use the framework of [Data Readiness Levels](https://inverseprobability.com/publications/data-readiness-levels.html) to track the state of your datasets throughout the process.
Visualise your data and predictions using appropriate charts and graphics.
Remember, your notebook should tell a story.

1.1. Download the data from online sources. In this task you will need:
  - UK Census Data
    - The UK Census data file for and Socio-economic Classification (NS SEC) [here](https://www.ons.gov.uk/datasets/TS062/editions/2021/versions/5) or [here](https://www.nomisweb.co.uk/sources/census_2021_bulk). Use the most detailed area type (Output Areas).
    - One other UK Census data file of your choosing [here](https://www.ons.gov.uk/search?topics=9731,6646,3845,9497,4262,4128,7755,4994,6885,9724,7367&filter=datasets) or [here](https://www.nomisweb.co.uk/sources/census_2021_bulk). Use the most detailed area type available.
    - Geographic coordinates of Census Output Areas [here](https://www.data.gov.uk/dataset/4d4e021d-fe98-4a0e-88e2-3ead84538537/output-areas-december-2021-boundaries-ew-bgc-v2).
  - Spatial data
    - Set up an API connection to Open Street Maps [here](https://osmnx.readthedocs.io/en/stable/). Optionally, to be able to use more data in training, you might want to also download the entire map for England (or more) [here](https://download.openstreetmap.fr/extracts/) or [here](https://wiki.openstreetmap.org/wiki/Planet.osm).

1.2. Structure the data, and upload it to your AWS database.

- Census datasets can be huge and unwieldy to manipulate directly in python frameworks such as pandas. As a result we will host the data in a relational database.

- Using a cloud hosted database (such as MariaDB hosted on the AWS RDS service),  you will construct a database containing tables that contain per-output-area data, latitudes and longitudes of the output areas, and potentially, extracted selected open street maps features from the country-wide data file.

- Make sure you set the correct indices on your data, which will allow your code to run much faster. Hint: create an index on any columns you will be looking up, and joint indices if your lookup involves multiple columns (like coordinates).

You will likely find the following resources helpful.

- Lecture 1, 2 and 3.
- Lab class 1 and 2.


- *Note, this particular dataset could probably be handled directly in pandas, but you are explicitly tasked with using a database. This is a compromise from previous iterations of this course, where students spent quite a lot of time struggling from the size of the data. This is an opportunity for you to demonstrate your skills, as well as prepare for the second part of the assessment where databases might be necessary. Approach that do not use databases where needed will be penalised during marking.*


2.0. Explore the data.

  - To make predictions you will augment your data with information obtained from Open Street Map: an open license source of mapping information. You will need to use the techniques you have learnt in the course to indentify and incorporate useful features for your prediction tasks.
  - Look at the percentage of students (NS SEC code L15) in the individual output areas, and explore how it's related to various types of open street map features in those locations. Use the relationships you found to suggest the features to be used in a system that predicts percentage of students in a given area.
  - Do the same for an additional census variable (e.g., the average age in a given area).

3.0. Make a prediction system.
- Use Open Street Maps to predict Socio-Economic Status - in particular, the percent of full time students, from OSM features. Your function should take as input a coordinate pair (you can assume it will be in England), and return an estimate for the share of students in that area.

- Do the same for the additional census variable you selected in the previous step.

Hints:

  - *Some of the census data sheets are very detailed. You might want to try "boiling them down" to some aggregate value or selecting just the most important columns.*

  - *This assignment is focused on data pipelines, rather than machine learning, so we do not expect any advanced architectures here - a linear model or similar is perfectly fine.*
  
  - *For data exploration, or picking your second dataset, you may find useful the [interactive map](https://www.ons.gov.uk/census/maps/) of the census data.*

### Story.

- Remember the notebook you create should tell a story, any code that is not critical to that story can safely be placed into the associated analysis library and imported for use (structured as given in the Fynesse template). Make sure your decisions are well substanciated and flow nicely throughout the notebook, with clear explanations.

- Explain everything you do. Why is your question important, why you selected the datasets you did, why did you join them how you did, why did you look at the things you looked at.

Make sure in your notebook you discuss the design decisions you made in your pipeline and prediction system. How did you structure your notebook? How did you track data readiness? Which aspects of your system could be improved further?

Make sure you compare how you predict student population and your second chosen property. What differences are there, in the underlying data, process, and results?

Make sure you discuss the reusability of your data pipeline and prediction system. What changes to the underlying data/system is it resilient to, and what isn't it? What work would need to be done to adjust it to answer questions about other statistics, or a different country, instead?

### Fynesse library.

- Alongside your implementation you will provide a short repository overview describing how you have implemented the different parts of the project and where you have placed those parts in your code repository. You will submit your code alongside a version of this notebook that will allow your examiner to understand and reconstruct the thinking behind your analysis. This notebook is structured to help you in creating that description and allow you to understand how we will allocate the marks. You should make use of the Fynesse framework (https://github.com/lawrennd/fynesse_template) for structuring your code.

- One artefact to be included in your submission is a python library structured according to the "Access, Assess, Address" standard for data science solutions. You will submit this library alongside your code. Use the cell below to perform the necessary installation instructions for your library.
You should base your module on the template repository given by the Fynesse template repository. That should make it pip-installable as below.
`%pip install git+https://github.com/lawrennd/fynesse_template.git`. You can either replace fynesse with the name you've given your analysis module or you can leave the name as fynesse if you prefer.

- Remember the notebook you create should tell a story, any code that is not critical to that story can safely be placed into the associated analysis library and imported for use (structured as given in the Fynesse template). Remember to include you fynesse library in your submission.

Let's start with installing, importing, and checking contents of fynesse library:

In [None]:
%pip uninstall --yes fynesse
%pip install git+https://github.com/sdenyskov/sd995_ads_2024.git

In [2]:
import fynesse

In [5]:
import os

print('FUNCTION DECLARATIONS:')
print()
package_path = os.path.dirname(fynesse.__file__)
for root, dirs, files in os.walk(package_path):
    for file in files:
        file_path = os.path.join(root, file)
        if file_path.endswith("s.py"):
            print(f"--- Function declarations in {file_path} ---\n")
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    lines = f.readlines()
                    for line in lines:
                        if line.strip().startswith('def '):
                            print(line.strip())
            except Exception as e:
                print(f"Could not read {file_path}: {e}")
            print()

FUNCTION DECLARATIONS:

--- Function declarations in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/fynesse/address.py ---

def fit_and_predict(x, y, x_pred, design_matrix = lambda x: x.reshape(-1, 1), regularised = False, alpha = 0.05, L1_wt = 1.0):
def cross_validation(x, y, k):
def predict_age_profile(nssec_df, age_df, nssec_row_to_predict):

--- Function declarations in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/fynesse/access.py ---

def download_data_csv(file_url_list, file_name_list):
def download_data_zip(file_url, file_name):
def download_census_data(code, base_dir=''):
def load_census_data(code, level='msoa'):
def create_connection(database='ads_2024'):
def csv_to_df(file_name):
def csv_to_db(conn, file_name, table_name):
def db_to_csv(conn, table_name, file_name):
def df_to_csv(df, file_name):
def get_bounds(lat, lon, km_box_dimension):
def get_bounds_by_name(place_name):
def km_distance(lat1, lon1, lat2, 

In [4]:
# fynesse.access.config['data_url']

## Access

In [None]:
%pip install osmnx

In [10]:
import pandas as pd
import numpy as np
import osmnx as ox
import requests
import os

We will start with downloading needed data from online sources. In this task we will need the following:

- UK Census data file for and Socio-economic Classification (NS SEC) - we will use the most detailed area type (Output Areas).

In [37]:
download_census_data('TS062')
nssec_2021_df = fynesse.access.load_census_data('TS062', level='oa')
nssec_2021_df

Files already exist at: census2021-ts062.


Unnamed: 0,date,geography,geography code,National Statistics Socio-economic Classification (NS-SEC): Total: All usual residents aged 16 years and over,"National Statistics Socio-economic Classification (NS-SEC): L1, L2 and L3 Higher managerial, administrative and professional occupations","National Statistics Socio-economic Classification (NS-SEC): L4, L5 and L6 Lower managerial, administrative and professional occupations",National Statistics Socio-economic Classification (NS-SEC): L7 Intermediate occupations,National Statistics Socio-economic Classification (NS-SEC): L8 and L9 Small employers and own account workers,National Statistics Socio-economic Classification (NS-SEC): L10 and L11 Lower supervisory and technical occupations,National Statistics Socio-economic Classification (NS-SEC): L12 Semi-routine occupations,National Statistics Socio-economic Classification (NS-SEC): L13 Routine occupations,National Statistics Socio-economic Classification (NS-SEC): L14.1 and L14.2 Never worked and long-term unemployed,National Statistics Socio-economic Classification (NS-SEC): L15 Full-time students
0,2021,E00000001,E00000001,159,80,38,13,9,3,5,5,4,2
1,2021,E00000003,E00000003,222,96,58,8,26,2,5,2,5,20
2,2021,E00000005,E00000005,103,37,28,8,21,0,3,1,1,4
3,2021,E00000007,E00000007,140,62,39,5,11,3,3,1,6,10
4,2021,E00000010,E00000010,170,34,45,17,27,7,10,14,11,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
188875,2021,W00010693,W00010693,737,2,8,5,8,7,9,14,2,682
188876,2021,W00010694,W00010694,304,35,87,40,31,23,26,33,7,22
188877,2021,W00010695,W00010695,154,12,44,22,15,12,15,17,11,6
188878,2021,W00010696,W00010696,176,32,55,26,14,9,13,12,7,8


- One other UK Census data file of our choosing - we will use Migrant Indicator - we will use the most detailed area type (Output Areas).

In [25]:
download_census_data('TS019')
mi_2021_df = fynesse.access.load_census_data('TS019', level='oa')
mi_2021_df

Files already exist at: census2021-ts019.


Unnamed: 0,date,geography,geography code,Migrant indicator: Total: All usual residents; measures: Value,Migrant indicator: Address one year ago is the same as the address of enumeration; measures: Value,Migrant indicator: Address one year ago is student term-time or boarding school address in the UK; measures: Value,Migrant indicator: Migrant from within the UK: Address one year ago was in the UK; measures: Value,Migrant indicator: Migrant from outside the UK: Address one year ago was outside the UK; measures: Value
0,2021,E00060274,E00060274,271,253,0,18,0
1,2021,E00060275,E00060275,407,386,0,19,2
2,2021,E00060276,E00060276,257,245,0,12,0
3,2021,E00060277,E00060277,275,244,0,31,0
4,2021,E00060279,E00060279,272,257,0,15,0
...,...,...,...,...,...,...,...,...
188875,2021,W00006938,W00006938,217,204,0,13,0
188876,2021,W00006940,W00006940,418,406,0,12,0
188877,2021,W00006941,W00006941,273,259,0,13,1
188878,2021,W00006942,W00006942,327,315,1,11,0


- Geographic coordinates of Census Output Areas.

In [23]:
download_data_csv(['https://open-geography-portalx-ons.hub.arcgis.com/api/download/v1/items/6beafcfd9b9c4c9993a06b6b199d7e6d/csv?layers=0'], ['uk_oa_list.csv'])
oa_df = csv_to_df("uk_oa_list.csv")
oa_df

Downloading data from https://open-geography-portalx-ons.hub.arcgis.com/api/download/v1/items/6beafcfd9b9c4c9993a06b6b199d7e6d/csv?layers=0


Unnamed: 0,FID,OA21CD,LSOA21CD,LSOA21NM,LSOA21NMW,BNG_E,BNG_N,LAT,LONG,Shape__Area,Shape__Length,GlobalID
0,1,E00000001,E01000001,City of London 001A,,532250,181864,51.52022,-0.09523,6949.151482,421.166161,3a44dd3d-5082-4a09-9b9c-3a5fadc811ed
1,2,E00000003,E01000001,City of London 001A,,532171,181819,51.51983,-0.09638,4492.411072,307.714653,f1216dc8-14d1-4857-9230-cab0641758fb
2,3,E00000005,E01000001,City of London 001A,,532166,181722,51.51896,-0.09649,8565.514214,385.204781,44d6f70f-549c-4288-9b6d-de2adbf02582
3,4,E00000007,E01000001,City of London 001A,,532088,181473,51.51674,-0.09771,75994.829704,1408.607657,4dd683e1-9a5c-46cf-9e19-8465c8fbb6cb
4,5,E00000010,E01000003,City of London 001C,,532092,182114,51.52250,-0.09741,2102.876602,215.271975,7476781f-8fe4-4c9b-bde1-0eecbd146dff
...,...,...,...,...,...,...,...,...,...,...,...,...
188875,188876,W00010693,W01000062,Gwynedd 001A,Gwynedd 001A,257885,371596,53.22236,-4.13012,117624.672455,2070.603553,5f872a8d-7128-47b4-a6ab-487a8aa93716
188876,188877,W00010694,W01000645,Carmarthenshire 006C,Sir Gaerfyrddin 006C,240146,220834,51.86313,-4.32295,261298.148006,3228.184286,69c22977-5117-45ee-89d5-0eb35fc713b1
188877,188878,W00010695,W01000664,Carmarthenshire 016C,Sir Gaerfyrddin 016C,256581,213406,51.80086,-4.08136,196049.177578,2232.955182,b511da17-e4e3-4425-bd9b-bea99d2c5071
188878,188879,W00010696,W01001923,Carmarthenshire 026G,Sir Gaerfyrddin 026G,252233,198463,51.66546,-4.13803,358656.479244,2928.234067,b36d2ac0-a236-4e97-8dc0-044327808718


Meanings of the columns: 
- FID - Unique feature identifier (internal reference).
- OA21CD - Output Area 2021 Code (smallest geographic unit, also used in downloaded census data).
- LSOA21CD - Lower Layer Super Output Area 2021 Code (larger area made of Output Areas).
- LSOA21NM - Name of the Lower Layer Super Output Area 2021.
- LSOA21NMW - Welsh name of the Lower Layer Super Output Area 2021 (if applicable).
- BNG_E - Easting coordinate (British National Grid system).
- BNG_N - Northing coordinate (British National Grid system).
- LAT - Latitude in decimal degrees (geographic position).
- LONG - Longitude in decimal degrees (geographic position).
- Shape__Area - Area of the geographical feature (polygon), typically in square meters or km².
- Shape__Length - Perimeter length of the geographical feature, in meters or kilometers.
- GlobalID - Unique global identifier for the feature (used across systems).

- We will also need to get spatial data - we will get it using Open Street Maps.

In [14]:
features_df = [
 'geometry',
 'ways',
 'nodes',
 'type',
 'addr:country',
 'addr:postcode',
 'addr:street',
 'addr:housenumber',
 'addr:city',
 'addr:housename',
 'addr:place',

 'name',
 'alt_name',
 'old_name',
 'nohousenumber',
 'note',

 'disused',
 'height',

 'amenity',
 'tourism',
 'brand',
 'leisure',
 'cuisine',
 'emergency',
 'historic',
 'sport',
 'shop',
 'religion',
 'power',
 'military',

 'house',
 'building',
 'building:material',
 'building:levels',
 'internet_access',

 'capacity',
 'roof:levels',
 'roof:shape'
 'fee',
 'man_made',
 'denomination',
 'layer']

features_db = ['element_type', 'osmid'] + features_df

In [20]:
ew_bounds = {"north": 55.8, "south": 49.8, "east": 2.0, "west": -7.0}

west = -7.0
south = 49.8
step = 0.5
tags = {"building": True}

for i in range(5, 12):
    for j in range(0, 18):
        if i == 3 and (j == 13 or j == 14):
            continue # Greater London has to be processed separately
        file_name = f'osm_uk_{i}_{j}.csv'
        print(f'Getting dataframe to form {file_name}')
        bounds = (south + step * (i + 1), south + step * i, west + step * (j + 1), west + step * j)
        try:
            df = get_all_buildings_from_osm_by_bounds(bounds, tags)
            for feature in features_df:
                if feature not in df.columns:
                    df[feature] = None
            df = df[features_df]
            # for column in df.columns:
            #     print(f"Column '{column}' has {df[column].count()} non-empty entries. 5 examples of non-empty entries: {df[column].dropna().head(5).tolist()}")
            df.to_csv("./" + file_name, index=True, header=True)
            print(f'{file_name} has been formed successfully')
        except Exception as e:
            print(f'{file_name} has not been created due to error "{e}"')

Getting dataframe to form osm_uk_5_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_0.csv has been formed successfully
Getting dataframe to form osm_uk_5_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_1.csv has been formed successfully
Getting dataframe to form osm_uk_5_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_2.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_5_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_3.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_5_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_4.csv has been formed successfully
Getting dataframe to form osm_uk_5_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_5.csv has been formed successfully
Getting dataframe to form osm_uk_5_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_6.csv has been formed successfully
Getting dataframe to form osm_uk_5_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_7.csv has been formed successfully
Getting dataframe to form osm_uk_5_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_8.csv has been formed successfully
Getting dataframe to form osm_uk_5_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_9.csv has been formed successfully
Getting dataframe to form osm_uk_5_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_10.csv has been formed successfully
Getting dataframe to form osm_uk_5_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_11.csv has been formed successfully
Getting dataframe to form osm_uk_5_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_12.csv has been formed successfully
Getting dataframe to form osm_uk_5_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_13.csv has been formed successfully
Getting dataframe to form osm_uk_5_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_14.csv has been formed successfully
Getting dataframe to form osm_uk_5_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_15.csv has been formed successfully
Getting dataframe to form osm_uk_5_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_16.csv has been formed successfully
Getting dataframe to form osm_uk_5_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_5_17.csv has been formed successfully
Getting dataframe to form osm_uk_6_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_0.csv has been formed successfully
Getting dataframe to form osm_uk_6_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_1.csv has been formed successfully
Getting dataframe to form osm_uk_6_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_2.csv has been formed successfully
Getting dataframe to form osm_uk_6_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_3.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_6_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_4.csv has been formed successfully
Getting dataframe to form osm_uk_6_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_5.csv has been formed successfully
Getting dataframe to form osm_uk_6_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_6.csv has been formed successfully
Getting dataframe to form osm_uk_6_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_7.csv has been formed successfully
Getting dataframe to form osm_uk_6_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_8.csv has been formed successfully
Getting dataframe to form osm_uk_6_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_9.csv has been formed successfully
Getting dataframe to form osm_uk_6_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_10.csv has been formed successfully
Getting dataframe to form osm_uk_6_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_11.csv has been formed successfully
Getting dataframe to form osm_uk_6_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_12.csv has been formed successfully
Getting dataframe to form osm_uk_6_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_13.csv has been formed successfully
Getting dataframe to form osm_uk_6_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_14.csv has been formed successfully
Getting dataframe to form osm_uk_6_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_15.csv has been formed successfully
Getting dataframe to form osm_uk_6_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_16.csv has been formed successfully
Getting dataframe to form osm_uk_6_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_6_17.csv has been formed successfully
Getting dataframe to form osm_uk_7_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_0.csv has been formed successfully
Getting dataframe to form osm_uk_7_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_1.csv has been formed successfully
Getting dataframe to form osm_uk_7_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_2.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_7_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_3.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_7_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_4.csv has been formed successfully
Getting dataframe to form osm_uk_7_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_5.csv has been formed successfully
Getting dataframe to form osm_uk_7_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_6.csv has been formed successfully
Getting dataframe to form osm_uk_7_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_7.csv has been formed successfully
Getting dataframe to form osm_uk_7_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_8.csv has been formed successfully
Getting dataframe to form osm_uk_7_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_9.csv has been formed successfully
Getting dataframe to form osm_uk_7_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_10.csv has been formed successfully
Getting dataframe to form osm_uk_7_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_11.csv has been formed successfully
Getting dataframe to form osm_uk_7_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_12.csv has been formed successfully
Getting dataframe to form osm_uk_7_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_13.csv has been formed successfully
Getting dataframe to form osm_uk_7_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_14.csv has been formed successfully
Getting dataframe to form osm_uk_7_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_15.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_7_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_16.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_7_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_7_17.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_8_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_0.csv has been formed successfully
Getting dataframe to form osm_uk_8_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_1.csv has been formed successfully
Getting dataframe to form osm_uk_8_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_2.csv has been formed successfully
Getting dataframe to form osm_uk_8_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_3.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_8_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_4.csv has been formed successfully
Getting dataframe to form osm_uk_8_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_5.csv has been formed successfully
Getting dataframe to form osm_uk_8_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_6.csv has been formed successfully
Getting dataframe to form osm_uk_8_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_7.csv has been formed successfully
Getting dataframe to form osm_uk_8_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_8.csv has been formed successfully
Getting dataframe to form osm_uk_8_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_9.csv has been formed successfully
Getting dataframe to form osm_uk_8_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_10.csv has been formed successfully
Getting dataframe to form osm_uk_8_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_11.csv has been formed successfully
Getting dataframe to form osm_uk_8_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_12.csv has been formed successfully
Getting dataframe to form osm_uk_8_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_13.csv has been formed successfully
Getting dataframe to form osm_uk_8_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_14.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_8_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_15.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_8_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_16.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_8_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_8_17.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_9_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_0.csv has been formed successfully
Getting dataframe to form osm_uk_9_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_1.csv has been formed successfully
Getting dataframe to form osm_uk_9_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_2.csv has been formed successfully
Getting dataframe to form osm_uk_9_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_3.csv has been formed successfully
Getting dataframe to form osm_uk_9_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_4.csv has been formed successfully
Getting dataframe to form osm_uk_9_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_5.csv has been formed successfully
Getting dataframe to form osm_uk_9_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_6.csv has been formed successfully
Getting dataframe to form osm_uk_9_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_7.csv has been formed successfully
Getting dataframe to form osm_uk_9_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_8.csv has been formed successfully
Getting dataframe to form osm_uk_9_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_9.csv has been formed successfully
Getting dataframe to form osm_uk_9_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_10.csv has been formed successfully
Getting dataframe to form osm_uk_9_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_11.csv has been formed successfully
Getting dataframe to form osm_uk_9_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_12.csv has been formed successfully
Getting dataframe to form osm_uk_9_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_13.csv has been formed successfully
Getting dataframe to form osm_uk_9_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_14.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_9_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_15.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_9_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_16.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_9_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_9_17.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_0.csv has been formed successfully
Getting dataframe to form osm_uk_10_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_1.csv has been formed successfully
Getting dataframe to form osm_uk_10_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_2.csv has been formed successfully
Getting dataframe to form osm_uk_10_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_3.csv has been formed successfully
Getting dataframe to form osm_uk_10_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_4.csv has been formed successfully
Getting dataframe to form osm_uk_10_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_5.csv has been formed successfully
Getting dataframe to form osm_uk_10_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_6.csv has been formed successfully
Getting dataframe to form osm_uk_10_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_7.csv has been formed successfully
Getting dataframe to form osm_uk_10_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_8.csv has been formed successfully
Getting dataframe to form osm_uk_10_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_9.csv has been formed successfully
Getting dataframe to form osm_uk_10_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_10.csv has been formed successfully
Getting dataframe to form osm_uk_10_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_11.csv has been formed successfully
Getting dataframe to form osm_uk_10_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_12.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_13.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_14.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_15.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_16.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_10_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_10_17.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_0.csv has been formed successfully
Getting dataframe to form osm_uk_11_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_1.csv has been formed successfully
Getting dataframe to form osm_uk_11_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_2.csv has been formed successfully
Getting dataframe to form osm_uk_11_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_3.csv has been formed successfully
Getting dataframe to form osm_uk_11_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_4.csv has been formed successfully
Getting dataframe to form osm_uk_11_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_5.csv has been formed successfully
Getting dataframe to form osm_uk_11_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_6.csv has been formed successfully
Getting dataframe to form osm_uk_11_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_7.csv has been formed successfully
Getting dataframe to form osm_uk_11_8.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_8.csv has been formed successfully
Getting dataframe to form osm_uk_11_9.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_9.csv has been formed successfully
Getting dataframe to form osm_uk_11_10.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_10.csv has been formed successfully
Getting dataframe to form osm_uk_11_11.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_11.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_12.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_12.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_13.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_13.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_14.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_14.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_15.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_15.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_16.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_16.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_11_17.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_11_17.csv has not been created due to error "No data elements in server response. Check log and query location/tags."


In [23]:
gl_bounds = {"north": 51.8, "south": 51.3, "east": 0.5, "west": -0.5}

west = -0.5
south = 51.3
step = 0.25
tags = {"building": True}

for i in range(0, 2):
    for j in range(0, 4):
        file_name = f'osm_uk_gl_{i}_{j}.csv'
        print(f'Getting dataframe to form {file_name}')
        bounds = (south + step * (i + 1), south + step * i, west + step * (j + 1), west + step * j)
        try:
            df = get_all_buildings_from_osm_by_bounds(bounds, tags)
            for feature in features_df:
                if feature not in df.columns:
                    df[feature] = None
            df = df[features_df]
            # for column in df.columns:
            #     print(f"Column '{column}' has {df[column].count()} non-empty entries. 5 examples of non-empty entries: {df[column].dropna().head(5).tolist()}")
            df.to_csv("./" + file_name, index=True, header=True)
            print(f'{file_name} has been formed successfully')
        except Exception as e:
            print(f'{file_name} has not been created due to error "{e}"')

Getting dataframe to form osm_uk_gl_0_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_0.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_1.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_2.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_3.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_4.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_5.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_6.csv has been formed successfully
Getting dataframe to form osm_uk_gl_0_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_0_7.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_0.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_1.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_2.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_3.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_4.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_5.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_6.csv has been formed successfully
Getting dataframe to form osm_uk_gl_1_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_1_7.csv has not been created due to error "No data elements in server response. Check log and query location/tags."
Getting dataframe to form osm_uk_gl_2_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_0.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_1.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_2.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_3.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_4.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_5.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_6.csv has been formed successfully
Getting dataframe to form osm_uk_gl_2_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_2_7.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_0.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_0.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_1.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_1.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_2.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_2.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_3.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_3.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_4.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_4.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_5.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_5.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_6.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_6.csv has been formed successfully
Getting dataframe to form osm_uk_gl_3_7.csv


  all_buildings_from_osm = ox.geometries_from_bbox(north, south, east, west, tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)
  return features.features_from_bbox(north, south, east, west, tags=tags)


osm_uk_gl_3_7.csv has been formed successfully


In [31]:
import subprocess
import glob

In [24]:
file_pattern = "osm_uk_*.csv"
file_list = glob.glob(file_pattern)
file_list.sort()

rows = 0
for file_name in file_list:
    try:
        result = subprocess.run(['wc', '-l', file_name], capture_output=True, text=True)
        row_count = int(result.stdout.split()[0]) - 1
        rows += row_count
        print(f'File {file_name} has {row_count} rows')
    except Exception as e:
        print(f'Error processing {file_name}: {e}')
print(f"Rows in total: {rows}")


File osm_uk_0_0.csv has 2528 rows
File osm_uk_0_1.csv has 2528 rows
File osm_uk_0_2.csv has 5637 rows
File osm_uk_0_3.csv has 57392 rows
File osm_uk_0_4.csv has 5397 rows
File osm_uk_0_5.csv has 205494 rows
File osm_uk_0_6.csv has 9018 rows
File osm_uk_0_7.csv has 41389 rows
File osm_uk_0_8.csv has 511699 rows
File osm_uk_10_0.csv has 55442 rows
File osm_uk_10_1.csv has 40252 rows
File osm_uk_10_10.csv has 277616 rows
File osm_uk_10_11.csv has 66901 rows
File osm_uk_10_2.csv has 11970 rows
File osm_uk_10_3.csv has 6405 rows
File osm_uk_10_4.csv has 5655 rows
File osm_uk_10_5.csv has 5765 rows
File osm_uk_10_6.csv has 16002 rows
File osm_uk_10_7.csv has 10396 rows
File osm_uk_10_8.csv has 20958 rows
File osm_uk_10_9.csv has 17964 rows
File osm_uk_11_0.csv has 288 rows
File osm_uk_11_1.csv has 1977 rows
File osm_uk_11_10.csv has 27813 rows
File osm_uk_11_2.csv has 3408 rows
File osm_uk_11_3.csv has 6492 rows
File osm_uk_11_4.csv has 76857 rows
File osm_uk_11_5.csv has 119976 rows
File os

Datasets that we are going to use can be huge and unwieldy to manipulate directly in python frameworks such as pandas. Therefore, we will host the data in a cloud hosted relational database (such as MariaDB hosted on the AWS RDS service).

Now let's structure the data and upload it to our AWS database.

We will construct tables that contain per-output-area data, latitudes and longitudes of the output areas, (and potentially, extracted selected open street maps features from the country-wide data file).

In [35]:
conn = create_connection()

Connection established!


In [27]:
# execute_query(conn, "DROP TABLE table_name;")

In [29]:
column_names = features_db
columns = len(column_names)
data_types = ['TEXT' for _ in range(columns)]
constraints = ['' for _ in range(columns)]

In [30]:
create_table_by_query(conn, 'osm_data', columns, column_names, data_types, constraints)

CREATE TABLE osm_data (`element_type` TEXT , `osmid` TEXT , `geometry` TEXT , `ways` TEXT , `nodes` TEXT , `type` TEXT , `addr:country` TEXT , `addr:postcode` TEXT , `addr:street` TEXT , `addr:housenumber` TEXT , `addr:city` TEXT , `addr:housename` TEXT , `addr:place` TEXT , `name` TEXT , `alt_name` TEXT , `old_name` TEXT , `nohousenumber` TEXT , `note` TEXT , `disused` TEXT , `height` TEXT , `amenity` TEXT , `tourism` TEXT , `brand` TEXT , `leisure` TEXT , `cuisine` TEXT , `emergency` TEXT , `historic` TEXT , `sport` TEXT , `shop` TEXT , `religion` TEXT , `power` TEXT , `military` TEXT , `house` TEXT , `building` TEXT , `building:material` TEXT , `building:levels` TEXT , `internet_access` TEXT , `capacity` TEXT , `roof:levels` TEXT , `roof:shapefee` TEXT , `man_made` TEXT , `denomination` TEXT , `layer` TEXT )


In [36]:
file_pattern = "osm_uk_*.csv"
file_list = glob.glob(file_pattern)
file_list.sort()

for file_name in file_list:
    try:
        csv_to_db(conn, file_name, 'osm_data', ignore_first_row=True)
        print(f'File {file_name} has been uploaded to the database successfully')
    except Exception as e:
        print(f'Error processing {file_name}: {e}')

Loading data into the table
Data loaded into the table osm_data
File osm_uk_0_5.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_0_6.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_0_7.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_0_8.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_10_0.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_10_1.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_10_10.csv has been uploaded to the database successfully
Loading data into the table
Data loaded into the table osm_data
File osm_uk_10_

In [39]:
column_names = ['date', 'geography', 'geography_code', 'NSSEC_Total', 'NSSEC_L1_L2_L3', 'NSSEC_L4_L5_L6', 'NSSEC_L7', 'NSSEC_L8_L9', 'NSSEC_L10_L11', 'NSSEC_L12', 'NSSEC_L13', 'NSSEC_L14', 'NSSEC_L15']
columns = len(column_names)
data_types = ['INT', 'TEXT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT', 'INT']
constraints = ['' for _ in range(columns)]

db_name = 'nssec_data'
file_name = 'census2021-ts062/census2021-ts062-oa.csv'

create_table_by_query(conn, db_name, columns, column_names, data_types, constraints)
csv_to_db(conn, file_name, db_name, ignore_first_row=True)

In [42]:
column_names = ['date', 'geography', 'geography_code', 'MI_Total', 'MI_AddrYearAgoSameInUK', 'MI_AddrYearAgoStudentInUK', 'MI_AddrYearAgoInUK', 'MI_AddrYearAgoOutUK']
columns = len(column_names)
data_types = ['INT', 'TEXT', 'TEXT', 'INT', 'INT', 'INT', 'INT', 'INT']
constraints = ['' for _ in range(columns)]

db_name = 'mi_data'
file_name = 'census2021-ts019/census2021-ts019-oa.csv'

create_table_by_query(conn, db_name, columns, column_names, data_types, constraints)
csv_to_db(conn, file_name, db_name, ignore_first_row=True)

CREATE TABLE mi_data (`date` INT , `geography` TEXT , `geography_code` TEXT , `MI_Total` INT , `MI_AddrYearAgoSameInUK` INT , `MI_AddrYearAgoStudentInUK` INT , `MI_AddrYearAgoInUK` INT , `MI_AddrYearAgoOutUK` INT )
Loading data into the table
Data loaded into the table mi_data


In [44]:
column_names = ['FID', 'OA21CD', 'LSOA21CD', 'LSOA21NM', 'LSOA21NMW', 'BNG_E', 'BNG_N', 'LAT', 'LONG', 'ShapeArea', 'ShapeLength', 'GlobalID']
columns = len(column_names)
data_types = ['INT', 'TEXT', 'TEXT', 'TEXT', 'TEXT', 'INT', 'INT', 'FLOAT', 'FLOAT', 'FLOAT', 'FLOAT', 'TEXT']
constraints = ['' for _ in range(columns)]

db_name = 'oa_data'
file_name = 'uk_oa_list.csv'

create_table_by_query(conn, db_name, columns, column_names, data_types, constraints)
csv_to_db(conn, file_name, db_name, ignore_first_row=True)

CREATE TABLE oa_data (`FID` INT , `OA21CD` TEXT , `LSOA21CD` TEXT , `LSOA21NM` TEXT , `LSOA21NMW` TEXT , `BNG_E` INT , `BNG_N` INT , `LAT` FLOAT , `LONG` FLOAT , `ShapeArea` FLOAT , `ShapeLength` FLOAT , `GlobalID` TEXT )
Loading data into the table
Data loaded into the table oa_data


In [47]:
get_summary_on_db(conn)


Table: mi_data

Approx Row Count: 0.1 M
('date', 'geography', 'geography_code', 'MI_Total', 'MI_AddrYearAgoSameInUK', 'MI_AddrYearAgoStudentInUK', 'MI_AddrYearAgoInUK', 'MI_AddrYearAgoOutUK')
(2021, 'E00060274', 'E00060274', 271, 253, 0, 18, 0)
(2021, 'E00060275', 'E00060275', 407, 386, 0, 19, 2)
(2021, 'E00060276', 'E00060276', 257, 245, 0, 12, 0)
(2021, 'E00060277', 'E00060277', 275, 244, 0, 31, 0)
(2021, 'E00060279', 'E00060279', 272, 257, 0, 15, 0)

No indices set on this table.

Table: nssec_data

Approx Row Count: 0.1 M
('date', 'geography', 'geography_code', 'NSSEC_Total', 'NSSEC_L1_L2_L3', 'NSSEC_L4_L5_L6', 'NSSEC_L7', 'NSSEC_L8_L9', 'NSSEC_L10_L11', 'NSSEC_L12', 'NSSEC_L13', 'NSSEC_L14', 'NSSEC_L15')
(2021, 'E00000001', 'E00000001', 159, 80, 38, 13, 9, 3, 5, 5, 4, 2)
(2021, 'E00000003', 'E00000003', 222, 96, 58, 8, 26, 2, 5, 2, 5, 20)
(2021, 'E00000005', 'E00000005', 103, 37, 28, 8, 21, 0, 3, 1, 1, 4)
(2021, 'E00000007', 'E00000007', 140, 62, 39, 5, 11, 3, 3, 1, 6, 10)
(2021,

We have to make sure that we set the correct indices on our data, as this will allow our code to run much faster. The best way to do that is to create an index on any column we will be looking up, and joint indices if our lookup involves multiple columns (like coordinates).

Please be aware that as it's not always possible to predict which columns have to be indexed, we might return to this later in the project. This is an example of the situation when we have to alternate between the parts of our fynesse framework in order to run our project smoothly.

TODO: Write a short paragraph summarising what you did in the Access stage.

## Assess

Explore the data.
- To make predictions you will augment your data with information obtained from Open Street Map: an open license source of mapping information. You will need to use the techniques you have learnt in the course to indentify and incorporate useful features for your prediction tasks.
- Look at the percentage of students (NS SEC code L15) in the individual output areas, and explore how it's related to various types of open street map features in those locations. Use the relationships you found to suggest the features to be used in a system that predicts percentage of students in a given area.
- Do the same for an additional census variable (e.g., the average age in a given area).

*Some of the census data sheets are very detailed. You might want to try "boiling them down" to some aggregate value or selecting just the most important columns.*

In [None]:
nssec_df = nssec_df.drop(nssec_df.columns[[0, 2]], axis=1).set_index('geography')
nssec_df = nssec_df.sort_index()
print(nssec_df.columns)
nssec_df.columns = range(10)
nssec_df.iloc[:, 1:10] = nssec_df.iloc[:, 1:10].div(nssec_df.iloc[:, 0], axis=0)
nssec_df = nssec_df.drop(columns=nssec_df.columns[0])

students = np.array(nssec_df[9])
occ_df = nssec_df.copy()
occ_df = np.array(occ_df.drop(columns=occ_df.columns[8]))

Write a short paragraph summarising what you did in the Assess stage.

In [None]:
# TODO: your answer here

## Address

Make a prediction system.
- Use Open Street Maps to predict Socio-Economic Status - in particular, the percent of full time students, from OSM features. Your function should take as input a coordinate pair (you can assume it will be in England), and return an estimate for the share of students in that area.
- Do the same for the additional census variable you selected in the previous step.

In [None]:
def estimate_students(latitude: float, longitude: float) -> float:
    """
    Args:
    latitude (float): The latitude coordinate.
    longitude (float): The longitude coordinate.

    Returns:
    float: Estimated share of students in that area (value between 0 and 1).
    """
    # TODO: implement
    return NotImplementedError


def estimate_something(latitude: float, longitude: float) -> float:
    #TODO: rename function
    """
    Args:
    latitude (float): The latitude coordinate.
    longitude (float): The longitude coordinate.

    Returns:
    float: Estimated value, percentage, probability, etc
    """
    # TODO: implement
    return NotImplementedError

Write a short paragraph summarising what you did in the Address stage.

In [None]:
# TODO: your answer here

## Discussion

#### Story and structure

In [None]:
# Marks awarded for inline explanation and structure, no need for answers here. Go over your notebook to make sure you explain what you did and why.

#### Design decisions

In [None]:
# TODO: your answer here

#### Comparison

In [None]:
# TODO: your answer here

#### Reusability

In [None]:
# TODO: your answer here

# Task 2: miniproject (13 marks)

We expect your miniproject to be more advanced than the example project, and that's why we allocated more marks to it. The first 7 marks will be allocated similarly to Task 1.

6 remaining points are *bonus* - they will be allocated for going far beyond what you did in Task 1, either in data access, assess, address or story.

#### Choosing your topic

You are given a lot of flexibility in choosing a miniproject topic.

Some hints:
- Your project should be based on the Census, Price Paid, OSM and Election datasets.
- It's a good idea to pick something that you are interested in.
- Bear in mind there are no marks dedicated to your overall prediction accuracy, so there is no advantage to picking 'easy' topics. It is perfectly ok to not reach significant predictive power, as long as you followed the proper process.

Some guidelines:
- Do not make a miniproject that furthers discriminatory rhetoric. In general, avoid topics that could be insensitive. If in doubt, message Radzim Sendyka (and *cc* Christian Cabrera) to discuss if this applies to your idea.
- Please avoid any projects that would require approval from the ethics comittee.
- Pick the scope of your projects that will allow you to show your skills. Some projects might not be challenging enough while others might be hard to make any progress in. Don't be scared to report negative results.

Please email Radzim Sendyka (and *cc* Christian Cabrera) with your project idea when you begin working on it, so we can let you know about any potential concerns (eg. potentially insensive, too off-topic, too simple, too complex) as soon as we can. You can change the topic up to the submission deadline, but let us know again in cases of major changes.

Most of the advice from Task 1 still applies, particularly around using an online relational database, structuring your notebook, fynesse, access-assess-address, data readiness levels, hints.

Sources you may want to include in your data include:

  - UK Census Data
    - UK Census 2021 data [here](https://www.ons.gov.uk/search?topics=9731,6646,3845,9497,4262,4128,7755,4994,6885,9724,7367&filter=datasets) or [here](https://www.nomisweb.co.uk/sources/census_2021_bulk).
    - Historical Census data [here](https://www.ons.gov.uk/census/historiccensusdata).
    - Geographic coordinates of Census Output Areas [here](https://www.data.gov.uk/dataset/4d4e021d-fe98-4a0e-88e2-3ead84538537/output-areas-december-2021-boundaries-ew-bgc-v2).
  - Spatial data
    - You should already have a connection to OpenStreetMaps.
    - You might want to also download the entire map for England (or more) [here](https://download.openstreetmap.fr/extracts/) or [here](https://wiki.openstreetmap.org/wiki/Planet.osm).
  - Election data
    - Recent Election Results data [here](https://commonslibrary.parliament.uk/research-briefings/cbp-10009/)
    - Historical Election Results data [here](https://commonslibrary.parliament.uk/research-briefings/cbp-8647/#fullreport).
    - Lookup reference between Output Areas and Parliamentary Constituencies [here](https://geoportal.statistics.gov.uk/datasets/5968b5b2c0f14dd29ba277beaae6dec3_0/explore).
  - Price Paid Data
    - You should have this data already in your database.
  - OSM
    - You should know how to access this data from previous practicals.

Example ideas:

- Are areas correlated in terms of census results? If so, find the areas that are leading change, and the ones that are following.
- Which historical census variables used to predict election results in the past, but don't seem related anymore?
- Census results are only updated once a decade, but new roads and houses are built every year. Can census results, for example those relating to travel, be predicted from basic map information, such as street networks?
- Building new houses is our best way of tackling the housing crisis. Use the information on new builds in the house prices dataset to explore what factors drive new developments, and make predictions for where the next houses will be built.
- House styles differ a lot between regions and inhabitants. Can you make any predictions about the house or area simply from looking at it's geometric shape and orientation?

Note that the ideas are just a starting point question. You should include more in-depth investigations into the data, validate your findings where applicable, and prepare visualisations.

## Title and description

In [None]:
# TODO: title your miniproject

In [None]:
# TODO: provide a very brief description of the goals of your miniproject

## TODO: Your Project Goes Here

## Conclusions

The below code is for marking purpuses only, and not part of your project. It helps us gather the data we need without asking you for database credentials, and scouring github pages. This is mandatory. If the code does not work for you, let us know immediately.

1) Please re-run the code from Practical 1 to help us see the contents of your database. We recommend you include all data sources there, but some smaller inputs are fine to use as files.

In [3]:
tables = %sql SHOW TABLES;

for row in tables:
    table_name = row[0]
    print(f"\nTable: {table_name}")

    table_status = %sql SHOW TABLE STATUS LIKE '{table_name}';
    approx_row_count = table_status[0][4] if table_status else 'Unable to fetch row count'
    print("\nApprox Row Count:", approx_row_count//100000/10, "M")

    first_5_rows = %sql SELECT * FROM `{table_name}` LIMIT 5;
    print(first_5_rows)

    indices = %sql SHOW INDEX FROM `{table_name}`;
    if indices:
        print("\nIndices:")
        for index in indices:
            print(f" - {index[2]} ({index[10]}): Column {index[4]}")
    else:
        print("\nNo indices set on this table.")

UsageError: Line magic function `%sql` not found.


2) Please paste a link to your fynesse library below, and make sure it's publically accessible. Also run the code below, and make sure it worked, printing out the contents of your library.

In [5]:
fynesse_url = 'https://github.com/sdenyskov/sd995_ads_2024.git' # '<your-url-library-here>'

In [None]:
import fynesse
import os

print('FYNESSE STRUCTURE:')
package_path = os.path.dirname(fynesse.__file__)
for root, dirs, files in os.walk(package_path):
    level = root.replace(package_path, '').count(os.sep)
    indent = ' ' * 4 * level
    print(f"{indent}{os.path.basename(root)}/")
    sub_indent = ' ' * 4 * (level + 1)
    for f in files:
        print(f"{sub_indent}{f}")

print('FILE CONTENTS')
for root, dirs, files in os.walk(package_path):
    for file in files:
        file_path = os.path.join(root, file)
        print(f"--- Contents of {file_path} ---\n")
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                print(f.read())
        except Exception as e:
            print(f"Could not read {file_path}: {e}")
        print("\n" + "-" * 50 + "\n")