In [1]:
import polars as pl
import duckdb


In [40]:
# use to get a dict of cols and dtypes to define table fields
# cols = pl.read_csv('data/portal_upload/species-tbl.csv', separator=';').columns
# dtypes = pl.read_csv('data/portal_upload/species-tbl.csv', separator=';').dtypes

In [41]:
# dict(zip(cols, dtypes))

In [5]:
pl_df = pl.read_csv('data/portal_upload/grants-tbl.csv', separator=";")

In [6]:
pl_df.glimpse()

Rows: 283
Columns: 7
$ grant_id       <str> SP10, WD9, WD8, WD7, BE1, BE7, HS1, HS2, HS5, HS6
$ id             <i64> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
$ url            <str> https://www.gov.uk/countryside-stewardship-grants/administration-of-group-managed-agreements-supplement-sp10, https://www.gov.uk/countryside-stewardship-grants/livestock-exclusion-supplement-scrub-and-successional-areas-wd9, https://www.gov.uk/countryside-stewardship-grants/creation-of-successional-areas-and-scrub-wd8, https://www.gov.uk/countryside-stewardship-grants/management-of-successional-areas-and-scrub-wd7, https://www.gov.uk/countryside-stewardship-grants/protection-of-in-field-trees-on-arable-land-be1, https://www.gov.uk/countryside-stewardship-grants/supplement-for-restorative-pruning-of-fruit-trees-be7, https://www.gov.uk/countryside-stewardship-grants/maintenance-of-weatherproof-traditional-farm-buildings-hs1, https://www.gov.uk/countryside-stewardship-grants/take-historic-and-archaeological-features-out-o

In [2]:
con = duckdb.connect('data/lnrs_db.duckdb')

In [43]:
def get_tables_list(con):
    tables = con.execute("SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'").fetchall()
    # This will print the list of table names
    table_list= [table[0] for table in tables]
    return table_list



In [62]:
table_list = get_tables_list(con)
reversed_table_list = table_list[::-1] # needs reversing to delete lookup tables first to avoid constraint error
table_list

['grants_tbl',
 'areas_tbl',
 'area_funding_schemes_tbl',
 'priority_measures_tbl',
 'area_measures_tbl',
 'priorities_tbl',
 'species_tbl',
 'priorities_measures_lookup_tbl',
 'priorities_areas_measures_lookup_tbl',
 'priorities_areas_lookup_tbl',
 'species_priority_lookup_tbl',
 'species_area_lookup_tbl',
 'priority_measures_grants_lookup_tbl',
 'areas_measures_grants_lookup_tbl']

In [63]:
for table in reversed_table_list:
    con.execute(f"DROP TABLE IF EXISTS {table}")

Grants


In [64]:
con.sql(
    """
CREATE TABLE grants_tbl 
(
grant_id VARCHAR (255) UNIQUE NOT NULL PRIMARY KEY,
id INT,
url VARCHAR (255),
grant_name VARCHAR (255),
grant_focus VARCHAR (255),
grant_scheme VARCHAR (255),
annual_payment VARCHAR (255)
);
"""
)

Areas

In [65]:
con.sql(
    """
CREATE TABLE areas_tbl (
    area_id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    area_name VARCHAR,
    area_description VARCHAR,
    area_link VARCHAR
);
    """
)

Funding Schemes

In [66]:
con.sql(
    """
CREATE TABLE area_funding_schemes_tbl 
(id INTEGER UNIQUE NOT NULL PRIMARY KEY,
area_id INTEGER,
funding_schemes VARCHAR,
FOREIGN KEY (area_id) REFERENCES areas_tbl (area_id));
    """
)

Priority Measures

In [67]:
con.sql(
    """
CREATE TABLE priority_measures_tbl(
    priority_measure_id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    measure VARCHAR,
    level_of_ambition VARCHAR,
    land_type VARCHAR,
    stakeholder VARCHAR,
    link_to_further_guidance VARCHAR
);
    """
)

Area Measures

In [68]:
con.sql(
    """
CREATE TABLE area_measures_tbl(
    area_measure_id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    measure VARCHAR,
    level_of_ambition VARCHAR,
    land_type VARCHAR,
    stakeholder VARCHAR
);
    """
)

Priorities

In [69]:
con.sql(
    """
CREATE TABLE priorities_tbl
(priority_id INTEGER UNIQUE NOT NULL PRIMARY KEY,
theme VARCHAR,
biodiversity_priority VARCHAR,
simplified_biodiversity_priority VARCHAR
);
    """
)

Species

In [70]:
con.sql(
    '''
CREATE TABLE species_tbl(
 species_id INTEGER UNIQUE NOT NULL PRIMARY KEY,
 taxa VARCHAR,
 common_name VARCHAR,
 assemblage VARCHAR,
 usage_key INTEGER,
 scientific_name VARCHAR,
 canonical_name VARCHAR,
 status VARCHAR,
 kingdom VARCHAR,
 phylum VARCHAR,
 "order" VARCHAR,
 family VARCHAR,
 genus VARCHAR,
 species VARCHAR,
 kingdom_key INTEGER,
 phylum_key INTEGER,
 class_key INTEGER,
 order_key INTEGER,
 family_key INTEGER,
 genus_key INTEGER,
 species_key INTEGER,
 synonym BOOL,
 class VARCHAR,
 accepted_usage_key INTEGER,
 verbatim_name VARCHAR,
 gbif_species_url VARCHAR
    
);
    '''
)

Priorities Measures Lookup

In [71]:
con.sql(
    """
CREATE TABLE priorities_measures_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    priority_id INTEGER,
    priority_measure_id INTEGER,
    FOREIGN KEY (priority_id) REFERENCES priorities_tbl (priority_id),
    FOREIGN KEY (priority_measure_id) REFERENCES priority_measures_tbl (priority_measure_id)
);
    """
)

Priorities Areas Measures Lookup

In [72]:
con.sql(
    """
CREATE TABLE priorities_areas_measures_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    priority_id INTEGER,
    area_id INTEGER,
    area_measure_id INTEGER,
    FOREIGN KEY (area_measure_id) REFERENCES area_measures_tbl (area_measure_id),
    FOREIGN KEY (priority_id) REFERENCES priorities_tbl (priority_id),
    FOREIGN KEY (area_id) REFERENCES areas_tbl (area_id)
);
    """
)

Priorities Areas Lookup

In [73]:
con.sql(
    """
CREATE TABLE priorities_areas_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    priority_id INTEGER,
    area_id INTEGER,
    FOREIGN KEY (area_id) REFERENCES areas_tbl (area_id),
    FOREIGN KEY (priority_id) REFERENCES priorities_tbl (priority_id)
);
    """
)

Species Priority Lookup

In [74]:
con.sql(
    """
CREATE TABLE species_priority_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    priority_id INTEGER,
    species_id INTEGER,
    FOREIGN KEY (species_id) REFERENCES species_tbl (species_id),
    FOREIGN KEY (priority_id) REFERENCES priorities_tbl (priority_id)
);
    """
)

Species Area Lookup

In [75]:
con.sql(
    """
CREATE TABLE species_area_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    species_id INTEGER,
    area_id INTEGER,
    FOREIGN KEY (species_id) REFERENCES species_tbl (species_id),
    FOREIGN KEY (area_id) REFERENCES areas_tbl (area_id)
);
    """
)

Priority Measures Grants Lookup

In [76]:
con.sql(
    """
CREATE TABLE priority_measures_grants_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    priority_measure_id INTEGER,
    grant_id VARCHAR (255),
    FOREIGN KEY (priority_measure_id) REFERENCES priority_measures_tbl (priority_measure_id),
    FOREIGN KEY (grant_id) REFERENCES grants_tbl (grant_id)
);
    """
)

Areas Measures Grants Lookup

In [77]:
con.sql(
    """
CREATE TABLE areas_measures_grants_lookup_tbl(
    id INTEGER UNIQUE NOT NULL PRIMARY KEY,
    area_measure_id INTEGER,
    grant_id VARCHAR (255),
    FOREIGN KEY (area_measure_id) REFERENCES area_measures_tbl (area_measure_id),
    FOREIGN KEY (grant_id) REFERENCES grants_tbl (grant_id)
);
    """
)

In [78]:
get_tables_list(con)

['species_area_lookup_tbl',
 'priority_measures_grants_lookup_tbl',
 'areas_measures_grants_lookup_tbl',
 'grants_tbl',
 'areas_tbl',
 'area_funding_schemes_tbl',
 'priority_measures_tbl',
 'area_measures_tbl',
 'priorities_tbl',
 'species_tbl',
 'priorities_measures_lookup_tbl',
 'priorities_areas_measures_lookup_tbl',
 'priorities_areas_lookup_tbl',
 'species_priority_lookup_tbl']

In [79]:
try:
    con.execute("BEGIN TRANSACTION;")
    con.execute("COPY grants_tbl FROM 'data/portal_upload/grants-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY areas_tbl FROM 'data/portal_upload/areas-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY area_funding_schemes_tbl FROM 'data/portal_upload/area-funding-schemes-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priority_measures_tbl FROM 'data/portal_upload/priority-measures-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY area_measures_tbl FROM 'data/portal_upload/area-measures-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priorities_tbl FROM 'data/portal_upload/priorities-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY species_tbl FROM 'data/portal_upload/species-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priorities_measures_lookup_tbl FROM 'data/portal_upload/priorities-measures-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priorities_areas_measures_lookup_tbl FROM 'data/portal_upload/priorities-areas-measures-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priorities_areas_lookup_tbl FROM 'data/portal_upload/priorities-areas-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY species_priority_lookup_tbl FROM 'data/portal_upload/species-priority-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY species_area_lookup_tbl FROM 'data/portal_upload/species-area-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY priority_measures_grants_lookup_tbl FROM 'data/portal_upload/priority-measures-grants-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COPY areas_measures_grants_lookup_tbl FROM 'data/portal_upload/areas-measures-grants-lookup-tbl.csv'(DELIMITER ';', HEADER);")
    con.execute("COMMIT;")
except Exception as e:
    # If an error occurs, rollback the transaction
    con.execute("ROLLBACK;")
    print(f"Transaction rolled back due to an error: {e}")

Transaction rolled back due to an error: Invalid Input Error: Error in file "data/portal_upload/priorities-measures-lookup-tbl.csv" on line 3: expected 4 values per row, but got 3.
Parser options:
  file=data/portal_upload/priorities-measures-lookup-tbl.csv
  delimiter=';'
  quote='"' (default)
  escape='"' (default)
  header=1
  sample_size=20480
  ignore_errors=0
  all_varchar=0


In [53]:
con.sql(
'''
COPY lnrs_all_grants_tbl FROM 'data/portal_upload/lnrs-all-grants-tbl.csv'(DELIMITER ';', HEADER)

'''
)

In [54]:
con.sql('SELECT * FROM lnrs_all_grants_tbl')

┌──────────────────────┬──────────────────────┬──────────┬───┬──────────────────┬──────────────────────┐
│         url          │      grant_name      │ grant_id │ … │  annual_payment  │     grant_focus      │
│       varchar        │       varchar        │ varchar  │   │     varchar      │       varchar        │
├──────────────────────┼──────────────────────┼──────────┼───┼──────────────────┼──────────────────────┤
│ https://www.gov.uk…  │ SP10: Administrati…  │ SP10     │ … │ NULL             │ Additional supplem…  │
│ https://www.gov.uk…  │ WD9: Livestock exc…  │ WD9      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ WD8: Creation of s…  │ WD8      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ WD7: Management of…  │ WD7      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ BE1: Protection of…  │ BE1      │ … │ NULL             │ Boundaries, trees …  │
│ https://www.gov.uk…  │ BE7: Supplement fo…  │ BE7    

In [45]:
con.sql('''
        CREATE TABLE lnrs_all_grants_tbl AS 
        SELECT * FROM 
        read_csv(
        'data/portal_upload/lnrs-all-grants-tbl.csv',
        header = true,
        delim = ';',
        columns = {'url': 'VARCHAR',
                'grant_name': 'VARCHAR',
                'grant_id': 'VARCHAR',
                'grant_scheme': 'VARCHAR',
                'annual_payment': 'VARCHAR',
                'grant_focus': 'VARCHAR'}
        ) 
        
        ''')

ParserException: Parser Error: syntax error at or near "("
LINE 15:         (url VARCHAR, grant_name VARCHAR, grant_id VARCHAR, grant_scheme VARCHAR, annual_payment VARCHAR, grant_focus VARCHAR)
        ...
                 ^

In [51]:
con.sql('''
       DROP TABLE lnrs_all_grants_tbl 
        
        
'''
        )

In [41]:
con.sql('SELECT * FROM lnrs_all_grants_tbl')

┌──────────────────────┬──────────────────────┬──────────┬───┬──────────────────┬──────────────────────┐
│         url          │      grant_name      │ grant_id │ … │  annual_payment  │     grant_focus      │
│       varchar        │       varchar        │ varchar  │   │     varchar      │       varchar        │
├──────────────────────┼──────────────────────┼──────────┼───┼──────────────────┼──────────────────────┤
│ https://www.gov.uk…  │ SP10: Administrati…  │ SP10     │ … │ NULL             │ Additional supplem…  │
│ https://www.gov.uk…  │ WD9: Livestock exc…  │ WD9      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ WD8: Creation of s…  │ WD8      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ WD7: Management of…  │ WD7      │ … │ NULL             │ Woodland and scrub   │
│ https://www.gov.uk…  │ BE1: Protection of…  │ BE1      │ … │ NULL             │ Boundaries, trees …  │
│ https://www.gov.uk…  │ BE7: Supplement fo…  │ BE7    

In [36]:
con.close()