# NUTS Mapping Creator

[NUTS](https://en.wikipedia.org/wiki/Nomenclature_of_Territorial_Units_for_Statistics) is a mapping schema to map the areas in the European Union. It has 3 levels and divides countries up but sems quite hard to get a simple mapping table in a non-ridiculous format.

In [7]:
from io import StringIO
import numpy as np
import pandas as pd
import requests

%config Completer.use_jedi = False

## 1. Read in the Raw Excel File from European Commission Website

In [2]:
NUTS_URL = 'https://ec.europa.eu/eurostat/documents/345175/629341/NUTS2021.xlsx'

In [3]:
res = requests.get(NUTS_URL)

In [12]:
if res.status_code == 200:
    res_xl = pd.ExcelFile(res.content, engine='openpyxl')

In [50]:
df_raw = pd.read_excel(res_xl, sheet_name='NUTS & SR 2021')

In [52]:
df_raw.head(10)

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
0,BE,Belgique/België,,,,0.0,1.0,1.0,,,,,,,,,,,
1,BE1,,Région de Bruxelles-Capitale/Brussels Hoofdste...,,,1.0,1.0,2.0,,,,,,,,,,,
2,BE10,,,Région de Bruxelles-Capitale/ Brussels Hoofdst...,,2.0,1.0,3.0,,,,,,,,,,,
3,BE100,,,,Arr. de Bruxelles-Capitale/Arr. Brussel-Hoofdstad,3.0,1.0,4.0,,,,,,,,,,,
4,BE2,,Vlaams Gewest,,,1.0,1.0,5.0,,,,,,,,,,,
5,BE21,,,Prov. Antwerpen,,2.0,1.0,6.0,,,,,,,,,,,
6,BE211,,,,Arr. Antwerpen,3.0,1.0,7.0,,,,,,,,,,,
7,BE212,,,,Arr. Mechelen,3.0,1.0,8.0,,,,,,,,,,,
8,BE213,,,,Arr. Turnhout,3.0,1.0,9.0,,,,,,,,,,,
9,BE22,,,Prov. Limburg (BE),,2.0,1.0,10.0,,,,,,,,,,,


## 2. Make Sure All Regions Have Standard Alphabet Names

In [89]:
df = df_raw.copy()

In [90]:
df['Unnamed: 9'] = df['Unnamed: 9'].astype(str)

In [91]:
df['Unnamed: 9'] = df['Unnamed: 9'].apply(lambda x: 'nan' if 'Latin' in x else x)

### 2a. Fix the countries first

In [92]:
df[(df['Unnamed: 9'] != 'nan') & (~df['Country'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
62,BG,България,,,,0.0,2.0,63.0,,BG,Bulgaria,,,,,,,,
636,EL,Ελλάδα,,,,0.0,8.0,637.0,,EL,Elláda,,,,,,,,
1111,CY,Κύπρος,,,,0.0,13.0,1112.0,,CY,Kýpros,,,,,,,,
1925,ME,Црна Гора,,,,0.0,33.0,1923.0,,ME,Crna Gora,,,,,,,,
1932,MK,Северна Македонија,,,,0.0,34.0,1930.0,,MK,Severna Makedonija,,,,,,,,
1966,RS,Srbija/Сpбија,,,,0.0,36.0,1964.0,,RS,Serbia,,,,,,,,


In [93]:
df['Country'] = np.where((df['Unnamed: 9'] != 'nan') & (~df['Country'].isna()), df['Unnamed: 10'], df['Country'])

In [94]:
df[(df['Unnamed: 9'] != 'nan') & (~df['Country'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
62,BG,Bulgaria,,,,0.0,2.0,63.0,,BG,Bulgaria,,,,,,,,
636,EL,Elláda,,,,0.0,8.0,637.0,,EL,Elláda,,,,,,,,
1111,CY,Kýpros,,,,0.0,13.0,1112.0,,CY,Kýpros,,,,,,,,
1925,ME,Crna Gora,,,,0.0,33.0,1923.0,,ME,Crna Gora,,,,,,,,
1932,MK,Severna Makedonija,,,,0.0,34.0,1930.0,,MK,Severna Makedonija,,,,,,,,
1966,RS,Serbia,,,,0.0,36.0,1964.0,,RS,Serbia,,,,,,,,


### 2b. Next fix NUTS1

In [97]:
df[(~df['NUTS level 1'].isna()) & (~df['Unnamed: 10'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
63,BG3,,Северна и Югоизточна България,,,1.0,2.0,64.0,,BG3,Severna i Yugoiztochna Bulgaria,,,,,,,,
86,BG4,,Югозападна и Южна централна България,,,1.0,2.0,87.0,,BG4,Yugozapadna i Yuzhna tsentralna Bulgaria,,,,,,,,
99,BGZ,,Extra-Regio NUTS 1,,,1.0,2.0,100.0,,BGZ,Extra-Regio NUTS 1,,,,,,,,
637,EL3,,Αττική,,,1.0,8.0,638.0,,EL3,Attiki,,,,,,,,
646,EL4,,"Νησιά Αιγαίου, Κρήτη",,,1.0,8.0,647.0,,EL4,"Nisia Aigaiou, Kriti",,,,,,,,
659,EL5,,Βόρεια Ελλάδα,,,1.0,8.0,660.0,,EL5,Voreia Elláda,,,,,,,,
682,EL6,,Κεντρική Ελλάδα,,,1.0,8.0,683.0,,EL6,Kentriki Elláda,,,,,,,,
706,ELZ,,Extra-Regio NUTS 1,,,1.0,8.0,707.0,,ELZ,Extra-Regio NUTS 1,,,,,,,,
1112,CY0,,Κύπρος,,,1.0,13.0,1113.0,,CY0,Kýpros,,,,,,,,
1115,CYZ,,Extra-Regio NUTS 1,,,1.0,13.0,1116.0,,CYZ,EXTRA-REGIO NUTS 1,,,,,,,,


In [98]:
df['NUTS level 1'] = np.where((~df['NUTS level 1'].isna()) & (~df['Unnamed: 10'].isna()), df['Unnamed: 10'], df['NUTS level 1'])

In [99]:
df[(~df['NUTS level 1'].isna()) & (~df['Unnamed: 10'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
63,BG3,,Severna i Yugoiztochna Bulgaria,,,1.0,2.0,64.0,,BG3,Severna i Yugoiztochna Bulgaria,,,,,,,,
86,BG4,,Yugozapadna i Yuzhna tsentralna Bulgaria,,,1.0,2.0,87.0,,BG4,Yugozapadna i Yuzhna tsentralna Bulgaria,,,,,,,,
99,BGZ,,Extra-Regio NUTS 1,,,1.0,2.0,100.0,,BGZ,Extra-Regio NUTS 1,,,,,,,,
637,EL3,,Attiki,,,1.0,8.0,638.0,,EL3,Attiki,,,,,,,,
646,EL4,,"Nisia Aigaiou, Kriti",,,1.0,8.0,647.0,,EL4,"Nisia Aigaiou, Kriti",,,,,,,,
659,EL5,,Voreia Elláda,,,1.0,8.0,660.0,,EL5,Voreia Elláda,,,,,,,,
682,EL6,,Kentriki Elláda,,,1.0,8.0,683.0,,EL6,Kentriki Elláda,,,,,,,,
706,ELZ,,Extra-Regio NUTS 1,,,1.0,8.0,707.0,,ELZ,Extra-Regio NUTS 1,,,,,,,,
1112,CY0,,Kýpros,,,1.0,13.0,1113.0,,CY0,Kýpros,,,,,,,,
1115,CYZ,,EXTRA-REGIO NUTS 1,,,1.0,13.0,1116.0,,CYZ,EXTRA-REGIO NUTS 1,,,,,,,,


### 2c. Next fix NUTS2

In [101]:
df[(~df['NUTS level 2'].isna()) & (~df['Unnamed: 11'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
64,BG31,,,Северозападен,,2.0,2.0,65.0,,BG31,,Severozapaden,,,,,,,
70,BG32,,,Северен централен,,2.0,2.0,71.0,,BG32,,Severen tsentralen,,,,,,,
76,BG33,,,Североизточен,,2.0,2.0,77.0,,BG33,,Severoiztochen,,,,,,,
81,BG34,,,Югоизточен,,2.0,2.0,82.0,,BG34,,Yugoiztochen,,,,,,,
87,BG41,,,Югозападен,,2.0,2.0,88.0,,BG41,,Yugozapaden,,,,,,,
93,BG42,,,Южен централен,,2.0,2.0,94.0,,BG42,,Yuzhen tsentralen,,,,,,,
100,BGZZ,,,Extra-Regio NUTS 2,,2.0,2.0,101.0,,BGZZ,,Extra-Regio NUTS 2,,,,,,,
638,EL30,,,Aττική,,2.0,8.0,639.0,,EL30,,Attiki,,,,,,,
647,EL41,,,Βόρειο Αιγαίο,,2.0,8.0,648.0,,EL41,,Voreio Aigaio,,,,,,,
651,EL42,,,Νότιο Αιγαίο,,2.0,8.0,652.0,,EL42,,Notio Aigaio,,,,,,,


In [102]:
df['NUTS level 2'] = np.where((~df['NUTS level 2'].isna()) & (~df['Unnamed: 11'].isna()), df['Unnamed: 11'], df['NUTS level 2'])

In [103]:
df[(~df['NUTS level 2'].isna()) & (~df['Unnamed: 11'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
64,BG31,,,Severozapaden,,2.0,2.0,65.0,,BG31,,Severozapaden,,,,,,,
70,BG32,,,Severen tsentralen,,2.0,2.0,71.0,,BG32,,Severen tsentralen,,,,,,,
76,BG33,,,Severoiztochen,,2.0,2.0,77.0,,BG33,,Severoiztochen,,,,,,,
81,BG34,,,Yugoiztochen,,2.0,2.0,82.0,,BG34,,Yugoiztochen,,,,,,,
87,BG41,,,Yugozapaden,,2.0,2.0,88.0,,BG41,,Yugozapaden,,,,,,,
93,BG42,,,Yuzhen tsentralen,,2.0,2.0,94.0,,BG42,,Yuzhen tsentralen,,,,,,,
100,BGZZ,,,Extra-Regio NUTS 2,,2.0,2.0,101.0,,BGZZ,,Extra-Regio NUTS 2,,,,,,,
638,EL30,,,Attiki,,2.0,8.0,639.0,,EL30,,Attiki,,,,,,,
647,EL41,,,Voreio Aigaio,,2.0,8.0,648.0,,EL41,,Voreio Aigaio,,,,,,,
651,EL42,,,Notio Aigaio,,2.0,8.0,652.0,,EL42,,Notio Aigaio,,,,,,,


### 2d. Next fix NUTS3

In [104]:
df[(~df['NUTS level 3'].isna()) & (~df['Unnamed: 12'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
65,BG311,,,,Видин,3.0,2.0,66.0,,BG311,,,Vidin,,,,,,
66,BG312,,,,Монтана,3.0,2.0,67.0,,BG312,,,Montana,,,,,,
67,BG313,,,,Враца,3.0,2.0,68.0,,BG313,,,Vratsa,,,,,,
68,BG314,,,,Плевен,3.0,2.0,69.0,,BG314,,,Pleven,,,,,,
69,BG315,,,,Ловеч,3.0,2.0,70.0,,BG315,,,Lovech,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,RS226,,,,Пиротска област,3.0,36.0,1992.0,,RS226,,,Pirotska oblast,,,,,,
1995,RS227,,,,Подунавска област,3.0,36.0,1993.0,,RS227,,,Podunavska oblast,,,,,,
1996,RS228,,,,Пчињска област,3.0,36.0,1994.0,,RS228,,,Pčinjska oblast,,,,,,
1997,RS229,,,,Топличка област,3.0,36.0,1995.0,,RS229,,,Toplička oblast,,,,,,


In [105]:
df['NUTS level 3'] = np.where((~df['NUTS level 3'].isna()) & (~df['Unnamed: 12'].isna()), df['Unnamed: 12'], df['NUTS level 3'])

In [106]:
df[(~df['NUTS level 3'].isna()) & (~df['Unnamed: 12'].isna())]

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3,NUTS level,Country order,Region order,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18
65,BG311,,,,Vidin,3.0,2.0,66.0,,BG311,,,Vidin,,,,,,
66,BG312,,,,Montana,3.0,2.0,67.0,,BG312,,,Montana,,,,,,
67,BG313,,,,Vratsa,3.0,2.0,68.0,,BG313,,,Vratsa,,,,,,
68,BG314,,,,Pleven,3.0,2.0,69.0,,BG314,,,Pleven,,,,,,
69,BG315,,,,Lovech,3.0,2.0,70.0,,BG315,,,Lovech,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1994,RS226,,,,Pirotska oblast,3.0,36.0,1992.0,,RS226,,,Pirotska oblast,,,,,,
1995,RS227,,,,Podunavska oblast,3.0,36.0,1993.0,,RS227,,,Podunavska oblast,,,,,,
1996,RS228,,,,Pčinjska oblast,3.0,36.0,1994.0,,RS228,,,Pčinjska oblast,,,,,,
1997,RS229,,,,Toplička oblast,3.0,36.0,1995.0,,RS229,,,Toplička oblast,,,,,,


### 2e. Only Keep Columns We Want & Rename

In [108]:
df = df[['Code 2021', 'Country', 'NUTS level 1', 'NUTS level 2', 'NUTS level 3']]
df.head(10)

Unnamed: 0,Code 2021,Country,NUTS level 1,NUTS level 2,NUTS level 3
0,BE,Belgique/België,,,
1,BE1,,Région de Bruxelles-Capitale/Brussels Hoofdste...,,
2,BE10,,,Région de Bruxelles-Capitale/ Brussels Hoofdst...,
3,BE100,,,,Arr. de Bruxelles-Capitale/Arr. Brussel-Hoofdstad
4,BE2,,Vlaams Gewest,,
5,BE21,,,Prov. Antwerpen,
6,BE211,,,,Arr. Antwerpen
7,BE212,,,,Arr. Mechelen
8,BE213,,,,Arr. Turnhout
9,BE22,,,Prov. Limburg (BE),


In [109]:
df = df.rename(columns={
    'Code 2021': 'Code',
    'NUTS level 1': 'NUTS1',
    'NUTS level 2': 'NUTS2',
    'NUTS level 3': 'NUTS3',
})

In [110]:
df.head(10)

Unnamed: 0,Code,Country,NUTS1,NUTS2,NUTS3
0,BE,Belgique/België,,,
1,BE1,,Région de Bruxelles-Capitale/Brussels Hoofdste...,,
2,BE10,,,Région de Bruxelles-Capitale/ Brussels Hoofdst...,
3,BE100,,,,Arr. de Bruxelles-Capitale/Arr. Brussel-Hoofdstad
4,BE2,,Vlaams Gewest,,
5,BE21,,,Prov. Antwerpen,
6,BE211,,,,Arr. Antwerpen
7,BE212,,,,Arr. Mechelen
8,BE213,,,,Arr. Turnhout
9,BE22,,,Prov. Limburg (BE),


## 3. Create Mapping Tables

### 3a. Create Country Mapping Table

First 2 letters only of the code along with the Country column

In [113]:
df_c = df[~df.Country.isna()][['Code', 'Country']]
df_c.head(10)

Unnamed: 0,Code,Country
0,BE,Belgique/België
62,BG,Bulgaria
102,CZ,Česko
129,DK,Danmark
150,DE,Deutschland
609,EE,Eesti
620,IE,Éire/Ireland
636,EL,Elláda
709,ES,España
798,FR,France


In [115]:
df.Country = df.Country.ffill()

### 3b. Create NUTS1 Only Table

Same thing again but include country and NUTS1

In [133]:
# drop the country specific rows
df = df.iloc[~df.index.isin(df_c.index)]

In [135]:
df_n1 = df[~df.NUTS1.isna()][['Code', 'Country', 'NUTS1']]
df_n1.head(10)

Unnamed: 0,Code,Country,NUTS1
1,BE1,Belgique/België,Région de Bruxelles-Capitale/Brussels Hoofdste...
4,BE2,Belgique/België,Vlaams Gewest
32,BE3,Belgique/België,Région wallonne
59,BEZ,Belgique/België,Extra-Regio NUTS 1
63,BG3,Bulgaria,Severna i Yugoiztochna Bulgaria
86,BG4,Bulgaria,Yugozapadna i Yuzhna tsentralna Bulgaria
99,BGZ,Bulgaria,Extra-Regio NUTS 1
103,CZ0,Česko,Česko
126,CZZ,Česko,Extra-Regio NUTS 1
130,DK0,Danmark,Danmark


In [136]:
df.NUTS1 = df.NUTS1.ffill()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


### 3c. Create NUTS2 Only Table

In [139]:
# drop the NUTS1 specific rows
df = df.iloc[~df.index.isin(df_n1.index)]

In [140]:
df_n2 = df[~df.NUTS2.isna()][['Code', 'Country', 'NUTS1', 'NUTS2']]
df_n2.head(10)

Unnamed: 0,Code,Country,NUTS1,NUTS2
2,BE10,Belgique/België,Région de Bruxelles-Capitale/Brussels Hoofdste...,Région de Bruxelles-Capitale/ Brussels Hoofdst...
5,BE21,Belgique/België,Vlaams Gewest,Prov. Antwerpen
9,BE22,Belgique/België,Vlaams Gewest,Prov. Limburg (BE)
13,BE23,Belgique/België,Vlaams Gewest,Prov. Oost-Vlaanderen
20,BE24,Belgique/België,Vlaams Gewest,Prov. Vlaams-Brabant
23,BE25,Belgique/België,Vlaams Gewest,Prov. West-Vlaanderen
33,BE31,Belgique/België,Région wallonne,Prov. Brabant Wallon
35,BE32,Belgique/België,Région wallonne,Prov. Hainaut
43,BE33,Belgique/België,Région wallonne,Prov. Liège
49,BE34,Belgique/België,Région wallonne,Prov. Luxembourg (BE)


In [141]:
df.NUTS2 = df.NUTS2.ffill()

### 3d. Create NUTS3 Only Table

In [142]:
# drop the NUTS2 specific rows
df = df.iloc[~df.index.isin(df_n2.index)]

In [143]:
df_n3 = df[~df.NUTS3.isna()][['Code', 'Country', 'NUTS1', 'NUTS2', 'NUTS3']]
df_n3.head(10)

Unnamed: 0,Code,Country,NUTS1,NUTS2,NUTS3
3,BE100,Belgique/België,Région de Bruxelles-Capitale/Brussels Hoofdste...,Région de Bruxelles-Capitale/ Brussels Hoofdst...,Arr. de Bruxelles-Capitale/Arr. Brussel-Hoofdstad
6,BE211,Belgique/België,Vlaams Gewest,Prov. Antwerpen,Arr. Antwerpen
7,BE212,Belgique/België,Vlaams Gewest,Prov. Antwerpen,Arr. Mechelen
8,BE213,Belgique/België,Vlaams Gewest,Prov. Antwerpen,Arr. Turnhout
10,BE223,Belgique/België,Vlaams Gewest,Prov. Limburg (BE),Arr. Tongeren
11,BE224,Belgique/België,Vlaams Gewest,Prov. Limburg (BE),Arr. Hasselt
12,BE225,Belgique/België,Vlaams Gewest,Prov. Limburg (BE),Arr. Maaseik
14,BE231,Belgique/België,Vlaams Gewest,Prov. Oost-Vlaanderen,Arr. Aalst
15,BE232,Belgique/België,Vlaams Gewest,Prov. Oost-Vlaanderen,Arr. Dendermonde
16,BE233,Belgique/België,Vlaams Gewest,Prov. Oost-Vlaanderen,Arr. Eeklo


In [147]:
df_n3[df_n3.Code.str.contains('UKN0')]

Unnamed: 0,Code,Country,NUTS1,NUTS2,NUTS3
1830,UKN06,United Kingdom,Northern Ireland,Northern Ireland,Belfast
1831,UKN07,United Kingdom,Northern Ireland,Northern Ireland,"Armagh City, Banbridge and Craigavon"
1832,UKN08,United Kingdom,Northern Ireland,Northern Ireland,"Newry, Mourne and Down"
1833,UKN09,United Kingdom,Northern Ireland,Northern Ireland,Ards and North Down
1834,UKN0A,United Kingdom,Northern Ireland,Northern Ireland,Derry City and Strabane
1835,UKN0B,United Kingdom,Northern Ireland,Northern Ireland,Mid Ulster
1836,UKN0C,United Kingdom,Northern Ireland,Northern Ireland,Causeway Coast and Glens
1837,UKN0D,United Kingdom,Northern Ireland,Northern Ireland,Antrim and Newtownabbey
1838,UKN0E,United Kingdom,Northern Ireland,Northern Ireland,Lisburn and Castlereagh
1839,UKN0F,United Kingdom,Northern Ireland,Northern Ireland,Mid and East Antrim
