# Exploration of a bulk download from ETER

[ETER](https://www.eter-project.com/) is the European Tertiary Education Register. It contains institution-level information about skills supply in the EU. We have bulk-downloaded the data and here we check some of its characteristics.

In [None]:
%run ../notebook_preamble.ipy

import seaborn as sn
import altair as alt
from altair_saver import save
from selenium import webdriver

from eis.utils.data_processing import *

#Altair saving code
from dotenv import load_dotenv,find_dotenv
load_dotenv(find_dotenv())
w = webdriver.Chrome(os.getenv('chrome_driver_path'))

save_dir = f"{project_dir}/reports/figures/final_report"

def save_altair_(f,n):
    save_altair(f,n,w,fig_path=save_dir)

## Read data

In [None]:
eter = pd.read_csv(f'{project_dir}/data/raw/eter/eter_export_all.csv',encoding='utf-8',sep=';')

In [None]:
eter.shape
    

In [None]:
#Load country code - name lookup
country_code_lu = pd.read_csv(f"{data_path}/aux/eu_iso_2_name_lookup.csv").set_index('Unnamed: 0')['0'].to_dict()

In [None]:
eter['Reference year'].value_counts()

In [None]:
my_variables = ["English Institution Name",
                "Reference year",
                "Country Code",
                "Region of establishment (NUTS 3)",
                "Total students enrolled ISCED 5-7",
                "Students enrolled at ISCED 5-7 - Information and Communication Technologies",
                "Total graduates ISCED 5-7",
                "Graduates at ISCED 5-7 - Information and Communication Technologies",
                "Total students enrolled at ISCED 8",
                "Students enrolled at ISCED 8 - Information and Communication Technologies",
                "Total graduates at ISCED 8",
                "Graduates at ISCED 8 - Information and Communication Technologies"]

In [None]:
eter_filtered = eter[my_variables]

In [None]:
student_counts = [x for x in eter_filtered.columns if 'ISCED' in x] 

for x in student_counts:
    
    print(x)
    eter_filtered[x] = eter_filtered[x].replace(['m','x','a','c','s',
                                                 'xr','xc'],[np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan])
    
    eter_filtered[x] = [int(x.split(',')[0]) if pd.isnull(x)==False else np.nan for x in eter_filtered[x]]    
            

In [None]:
country_counts = eter_filtered.groupby([
    "Country Code",
    "Reference year"])[student_counts].sum().reset_index(drop=False)

In [None]:
country_pivoted = country_counts.pivot_table(index='Country Code',
                           columns='Reference year',
                           values='Graduates at ISCED 5-7 - Information and Communication Technologies',
                          aggfunc='sum')

In [None]:
country_long = country_pivoted.reset_index(drop=False).melt(id_vars='Country Code')

country_long['country_name'] = country_long['Country Code'].apply(lambda x: x.lower()).map(country_code_lu)

In [None]:
# fig,ax = plt.subplots(figsize=(5,8))

# sn.heatmap(country_pivoted.sort_values(2016,ascending=False).drop(2017,axis=1),
#           cmap='Purples')

# save_fig('fig_6_eter.pdf',material_outputs)

In [None]:
a = alt.Chart(country_long,height=500).mark_rect().encode(y=alt.Y('country_name',title=None,
                                                  sort=alt.EncodingSortField('value',op='mean',order='descending')),
                                           x='Reference year:O',
                                                          color=alt.Color('value',title='ICT graduates'))


save_altair_(a,'fig_7_eter')

a