In [1]:
# IMPORT LIBRARIES.
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine
from shapely import wkt
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import geoviews as gv
from geoviews import opts
import geoviews.tile_sources as gvts
import panel as pn
from bokeh.models import BasicTickFormatter
gv.extension("bokeh") 

In [2]:
# SET DISPLAY OPTIONS (None MEANS UNLIMITED).
# TO SET NUMBER OF ROWS DISPLAYED:
pd.options.display.max_rows=200
# TO SET NUMBER OF COLUMNS DISPLAYED:
pd.options.display.max_columns=None

## 1. CONNECT TO POSTGRES DATABASE.

In [3]:
def connect_to_postgres():
    """
    Connect to Postgres database 'github_projects' as user 'postgres'.
    """

    conn_params_dict = {"user":"postgres",
                        "password":"password",
                        # FOR host, USE THE POSTGRES INSTANCE CONTAINER NAME, AS THE CONTAINER IP CAN CHANGE.
                        "host":"postgres",
                        "database":"github_projects"}

    connect_alchemy = "postgresql+psycopg2://%s:%s@%s/%s" % (
        conn_params_dict['user'],
        conn_params_dict['password'],
        conn_params_dict['host'],
        conn_params_dict['database']
    )

    # CREATE POSTGRES ENGINE (CONNECTION POOL).
    engine = create_engine(connect_alchemy)
    print("Connection to Postgres successful.")
    return engine

In [4]:
# EXECUTE FUNCTION TO CONNECT TO POSTGRES.
engine = connect_to_postgres()

Connection to Postgres successful.


---
---
## 2. JOIN TWO POSTGRES TABLES, AUTOMATICALLY DOWNLOAD JOIN RESULT AND LOAD INTO PANDAS/GEOPANDAS.

In [5]:
def dl_2_table_inner_join(table_a,table_b,atts_list,join_att_a,join_att_b,dtype,index_col=None):
    """
    This function performs an INNER JOIN between 2 Postgres tables \033[1mtable_a\033[0m and \033[1mtable_b\033[0m.
    The result of the table join is then automatically downloaded into Pandas or GeoPandas (depending on
    whether it is a DataFrame or GeoDataFrame).
    
    The following arguments are required:
    'table_a': The first Postgres table involved in the INNER JOIN. A string.
    'table_b': The second Postgres table involved in the INNER JOIN. A string.
    'atts_list': A list of Postgres table attributes that the user wishes to SELECT from the result of the 
    INNER JOIN.
    'join_att_a': The attribute from \033[1mtable_a\033[0m used for the JOIN. A string.
    'join_att_b': The attribute from \033[1mtable_b\033[0m used for the JOIN. A string.
    'dtype': A dictionary of "attribute names:Pandas/GeoPandas data types" for the downloaded 
    DataFrame/GeoDataFrame. If an attribute is not present in this dictionary then a default dtype is set 
    by Pandas/GeoPandas automatically for the attribute.
    'index_col': Which attribute to set as the index of the DataFrame/GeoDataFrame. Optional string, defaults ...
    to None.
    """
    # EXTRACT STRING OF ATTRIBUTE NAMES FROM atts_list.
    atts_list_extracted = ",".join(atts_list)
    
    # THE SQL QUERY:
    sql = f"""
           SELECT 
           {atts_list_extracted}
           FROM 
           {table_a} a 
           INNER JOIN 
           {table_b} b
           ON a.{join_att_a} = b.{join_att_b};
            """
    
    if "geometry" in atts_list_extracted:
        print("This table is a GeoDataFrame:\n")
    
        # DOWNLOAD JOINED TABLE FROM POSTGRES AND LOAD INTO PANDAS AS A DATAFRAME.
        df_from_pg = pd.read_sql(sql, con=engine)
    
        # DESERIALIZE THE WKT STRINGS REPRESENTATION OF THE GEOMETRY COLUMN.
        df_from_pg['geometry'] = df_from_pg["geometry"].apply(lambda x: wkt.loads(x))

        # CONVERT DATAFRAME INTO GEODATAFRAME AND SET GEOMETRY COLUMN.
        gdf_from_pg = gpd.GeoDataFrame(df_from_pg,geometry=df_from_pg["geometry"],crs=4326)
        
        # SET DTYPES.
        gdf_from_pg = gdf_from_pg.astype(dtype)
        
        print(f"The result of the INNER JOIN between \033[1m{table_a}\033[0m and \033[1m{table_b}\033[0m tables was successfully downloaded from Postgres and loaded into GeoPandas.\n")
        
         # SET INDEX COLUMN (OPTIONAL).
        if index_col==None:
            print(f"No index attribute was set on the GeoDataFrame so the default numeric index has been used.\n")
        else:
            gdf_from_pg.set_index(index_col,inplace=True)
            print(f"The \033[1m{index_col}\033[0m attribute was set as the GeoDataFrame index.\n")
            
        return gdf_from_pg
        
    else:
        print("This table is a DataFrame:\n")
        # DOWNLOAD JOINED TABLE FROM POSTGRES AND LOAD INTO PANDAS AS A DATAFRAME.
        df_from_pg = pd.read_sql(sql, con=engine)
        
        # SET DTYPES.
        df_from_pg = df_from_pg.astype(dtype)
        
        print(f"The result of the INNER JOIN between \033[1m{table_a}\033[0m and \033[1m{table_b}\033[0m tables was successfully downloaded from Postgres and loaded into Pandas.\n")
        
        # SET INDEX COLUMN (OPTIONAL).
        if index_col==None:
            print(f"No index attribute was set on the DataFrame so the default numeric index has been used.\n")
        else:
            df_from_pg.set_index(index_col,inplace=True)
            print(f"The \033[1m{index_col}\033[0m attribute was set as the DataFrame index.\n")
               
        return df_from_pg
    

### 2.1. EXAMPLE INNER JOIN BETWEEN "uk_pop_stats_regions" AND "gdf_ons_regions" POSTGRES TABLES.

In [6]:
dl_2_table_inner_join(# SELECT THE 2 TABLES TO INNER JOIN.
                        table_a="uk_pop_stats_regions",
                        table_b="gdf_ons_regions",
                        # SPECIFY THE ATTRIBUTES TO SELECT FROM THE SQL INNER JOIN RESULT.
                        atts_list=['a.code AS code','a.name AS name','geography','total_population', 
                                   'male_population','female_population','male_percent','female_percent',
                                   'male_female_ratio','"0-4"','"5-18"','"19-24"','"25-34"','"35-44"',
                                   '"45-54"','"55-64"','"65-74"','"75-84"','"85plus"','median_age',
                                   'area_sq_km','ppl_per_sq_km','bng_e','bng_n','long','lat',
                                   'shape_area','shape_length','geometry'],
                        # SPECIFY ATTRIBUTE FROM EACH TABLE TO PERFORM INNER JOIN ON.
                        join_att_a="name",
                        join_att_b="name",
                        # SET THE PANDAS/GEOPANDAS DTYPES FOR THE DOWNLOADED DATAFRAME/GEODATAFRAME.
                        dtype={"code":"string",
                               "name":"string",
                               "geography":"string",
                               "total_population":"Int32",
                               "male_population":"Int32",
                               "female_population":"Int32",
                               "male_percent":"float64",
                               "female_percent":"float64",
                               "male_female_ratio":"string",
                               "0-4":"Int32",
                               "5-18":"Int32",
                               "19-24":"Int32",
                               "25-34":"Int32",
                               "35-44":"Int32",
                               "45-54":"Int32",
                               "55-64":"Int32",
                               "65-74":"Int32",
                               "75-84":"Int32",
                               "85plus":"Int32",
                               "median_age":"float64",
                               "area_sq_km":"Int32",
                               "ppl_per_sq_km":"Int32",
                               "bng_e":"Int32",
                               "bng_n":"Int32",
                               "long":"float64",
                               "lat":"float64",
                               "shape_area":"float64",
                               "shape_length":"float64"
                              },
                        # SET THE DATAFRAME/GEODATAFRAME INDEX.
                        index_col="code")

This table is a GeoDataFrame:

The result of the INNER JOIN between [1muk_pop_stats_regions[0m and [1mgdf_ons_regions[0m tables was successfully downloaded from Postgres and loaded into GeoPandas.

The [1mcode[0m attribute was set as the GeoDataFrame index.



Unnamed: 0_level_0,name,geography,total_population,male_population,female_population,male_percent,female_percent,male_female_ratio,0-4,5-18,19-24,25-34,35-44,45-54,55-64,65-74,75-84,85plus,median_age,area_sq_km,ppl_per_sq_km,bng_e,bng_n,long,lat,shape_area,shape_length,geometry
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
E12000001,north east,region,2669941,1312124,1357817,49.14,50.86,49.14:50.86,141744,419201,210301,349130,305340,355053,357164,294123,171572,66313,41.8,8579,311,417314,600356,-1.72888,55.29701,8605838000.0,646532.1,"MULTIPOLYGON (((-1.77887 55.66773, -1.80244 55..."
E12000002,north west,region,7341196,3625650,3715546,49.39,50.61,49.39:50.61,429637,1214775,551794,985280,879527,991592,913480,750263,450818,174030,40.3,14108,520,350014,506279,-2.77239,54.44944,14177570000.0,1080405.0,"MULTIPOLYGON (((-3.17307 54.04707, -3.20969 54..."
E12000003,yorkshire and the humber,region,5502967,2717432,2785535,49.38,50.62,49.38:50.62,318135,913451,440981,730679,650900,737877,678167,563472,337035,132270,40.1,15405,357,446902,448736,-1.28714,53.93264,15426740000.0,874153.8,"MULTIPOLYGON (((-0.53575 53.68126, -0.55549 53..."
E12000004,east midlands,region,4835928,2391957,2443971,49.46,50.54,49.46:50.54,269828,786914,379297,610112,570110,667871,607704,519456,304889,119747,41.4,15625,309,477659,322635,-0.84969,52.79572,15658940000.0,896271.5,"POLYGON ((-0.24848 53.59366, -0.25140 53.58465..."
E12000005,west midlands,region,5934037,2941291,2992746,49.57,50.43,49.57:50.43,354654,1012764,461746,798881,710231,790054,700345,586635,372198,146529,39.6,12998,457,386294,295477,-2.20358,52.55697,13002150000.0,774627.6,"POLYGON ((-1.86073 53.18843, -1.81254 53.15434..."
E12000006,east,region,6236072,3072499,3163573,49.27,50.73,49.27:50.73,368032,1045892,396843,767491,787249,863440,768619,660187,404545,173774,41.7,19116,326,571078,263235,0.504207,52.24073,19151190000.0,1081851.0,"MULTIPOLYGON (((0.68133 52.97746, 0.68350 52.9..."
E12000007,london,region,8961989,4475817,4486172,49.94,50.06,49.94:50.06,606004,1521349,652277,1608119,1442552,1163631,886542,586472,344126,150917,35.6,1572,5701,517515,178392,-0.30866,51.49227,1584125000.0,271231.4,"POLYGON ((-0.01192 51.68088, -0.01226 51.64623..."
E12000008,south east,region,9180135,4523939,4656196,49.28,50.72,49.28:50.72,520679,1551851,634023,1095467,1161970,1290015,1134914,946459,586265,258492,41.7,19072,481,470062,172924,-0.99311,51.45097,19105630000.0,1468849.0,"MULTIPOLYGON (((0.68783 51.41435, 0.66011 51.4..."
E12000009,south west,region,5624696,2767122,2857574,49.2,50.8,49.2:50.8,290924,876607,400481,664204,640060,763740,735551,668999,409151,174979,44.1,23836,236,285013,102567,-3.63346,50.81119,23969360000.0,1662106.0,"MULTIPOLYGON (((-4.67092 51.15846, -4.68057 51..."


---
---

## 3. PRODUCE 3 INTERACTIVE MAPS. PRESENT EACH PLOT IN A SEPARATE TAB.

In [7]:
# COMMON FUNCTION PARAMETERS.
# EACH OF THE 3 RESULT GEODATAFRAMES BEING PLOTTED BELOW SHARE THE SAME atts_list LIST, join_att_a AND join_att_b 
# STRINGS, dtype DICTIONARY AND index_col STRING:

# SPECIFY THE ATTRIBUTES TO SELECT FROM THE SQL INNER JOIN RESULT.
atts_list=['a.code AS code','a.name AS name','geography','total_population', 
           'male_population','female_population','male_percent','female_percent',
           'male_female_ratio','"0-4"','"5-18"','"19-24"','"25-34"','"35-44"',
           '"45-54"','"55-64"','"65-74"','"75-84"','"85plus"','median_age',
           'area_sq_km','ppl_per_sq_km','bng_e','bng_n','long','lat',
           'shape_area','shape_length','geometry']

# SPECIFY ATTRIBUTE FROM EACH TABLE TO PERFORM INNER JOIN ON.
join_att_a="name"
join_att_b="name"

# SET THE PANDAS/GEOPANDAS DTYPES FOR THE DOWNLOADED DATAFRAME/GEODATAFRAME.
dtype={
       "code":"string",
       "name":"string",
       "geography":"string",
       "total_population":"Int32",
       "male_population":"Int32",
       "female_population":"Int32",
       "male_percent":"float64",
       "female_percent":"float64",
       "male_female_ratio":"string",
       "0-4":"Int32",
       "5-18":"Int32",
       "19-24":"Int32",
       "25-34":"Int32",
       "35-44":"Int32",
       "45-54":"Int32",
       "55-64":"Int32",
       "65-74":"Int32",
       "75-84":"Int32",
       "85plus":"Int32",
       "median_age":"float64",
       "area_sq_km":"Int32",
       "ppl_per_sq_km":"Int32",
       "bng_e":"Int32",
       "bng_n":"Int32",
       "long":"float64",
       "lat":"float64",
       "shape_area":"float64",
       "shape_length":"float64"
       }

# SET THE DATAFRAME/GEODATAFRAME INDEX.
index_col="code"

In [8]:
# AUTOMATE THE QUERY OF POSTGRES, THE DOWNLOAD OF THE RESULT INTO PANDAS/GEOPANDAS AND THE PRODUCTION OF ...
# A MAP FOR EACH ADMINISTRATIVE GEOGRAPHY.
def produce_interactive_maps():
    """
    This function presents Office of National Statistics (ONS) population estimate data on a map, sub divided by 
    UK Country, Region and Local Authority Districts. Select the appropriate tab to display the desired map.
    """
    table_a_list = ["uk_pop_stats_countries","uk_pop_stats_regions","uk_pop_stats_lad20"]
    table_b_list = ["gdf_ons_countries","gdf_ons_regions","gdf_ons_lad20"]
    
    # CREATE EMPTY LIST TO HOLD JOIN RESULT GEODATAFRAMES.
    result_list = []
    for table_a,table_b in zip(table_a_list,table_b_list):
        joined = dl_2_table_inner_join(# SELECT THE 2 TABLES TO INNER JOIN.
                                       table_a=table_a,
                                       table_b=table_b,
                                       # SPECIFY THE ATTRIBUTES TO SELECT FROM THE SQL INNER JOIN RESULT.
                                       atts_list=atts_list,
                                       # SPECIFY ATTRIBUTE FROM EACH TABLE TO PERFORM INNER JOIN ON.
                                       join_att_a=join_att_a,
                                       join_att_b=join_att_b,
                                       # SET THE PANDAS/GEOPANDAS DTYPES FOR THE DOWNLOADED DATAFRAME/GEODATAFRAME.
                                       dtype=dtype,
                                       # SET THE DATAFRAME/GEODATAFRAME INDEX.
                                       index_col=index_col)
        result_list.append(joined)
    
    # UNPACK THE result_list CONTAINING THE RESULT GEODATAFRAMES.
    countries_augmented,regions_augmented,lad20_augmented = [gdf for gdf in result_list]
    
    gdfs = [countries_augmented,regions_augmented,lad20_augmented]
    admin_geos = ["Country","Region","Local Authority District"]

    # CREATE EMPTY LIST TO HOLD RESULT PLOTS.
    plot_list = []
    for gdf,admin_geo in zip(gdfs,admin_geos):
        plot = gv.Polygons(gdf,
                           label = f"UK Population Estimate Map by {admin_geo}"
                          ).opts(tools=["hover","tap"],
                                 color="total_population",
                                 cmap="coolwarm",width=600,height=900,
                                 colorbar=True,clabel="Population",
                                 cformatter=BasicTickFormatter(use_scientific=False)
                                 )
        
        # ADD PLOT TO LIST.
        plot_list.append(plot*gvts.OSM)
        
    print("\nThe UK Population Estimate Map by Administrative Geographies has been successfully produced using this data. Select a tab to display the relevant interactive map.")
    
    # RETURN A TABBED VISUALISATION THAT CONTAINS ALL 3 INTERACTIVE MAPS.
    return pn.Tabs((admin_geos[0],plot_list[0]),(admin_geos[1],plot_list[1]),(admin_geos[2],plot_list[2]))
        
    

### 3.1. UK Population Maps by Country, Regions and Local Authority Districts:

In [9]:
# CALL FUNCTION FROM SECTION 3 TO PRODUCE THE TABBED VISUALISATION THAT CONTAINS ALL 3 INTERACTIVE MAPS.
produce_interactive_maps()

This table is a GeoDataFrame:

The result of the INNER JOIN between [1muk_pop_stats_countries[0m and [1mgdf_ons_countries[0m tables was successfully downloaded from Postgres and loaded into GeoPandas.

The [1mcode[0m attribute was set as the GeoDataFrame index.

This table is a GeoDataFrame:

The result of the INNER JOIN between [1muk_pop_stats_regions[0m and [1mgdf_ons_regions[0m tables was successfully downloaded from Postgres and loaded into GeoPandas.

The [1mcode[0m attribute was set as the GeoDataFrame index.

This table is a GeoDataFrame:

The result of the INNER JOIN between [1muk_pop_stats_lad20[0m and [1mgdf_ons_lad20[0m tables was successfully downloaded from Postgres and loaded into GeoPandas.

The [1mcode[0m attribute was set as the GeoDataFrame index.


The UK Population Estimate Map by Administrative Geographies has been successfully produced using this data. Select a tab to display the relevant interactive map.


---
---
## 4. CLOSE ALL CONNECTIONS TO POSTGRES DATABASE.

In [10]:
def disconnect_from_postgres():
    """
    Completely disconnect from Postgres.
    """
    engine.dispose() 
    print("All connections to Postgres have been terminated.")

In [11]:
disconnect_from_postgres()

All connections to Postgres have been terminated.
