# Preprocesado

In [2]:
import pandas as pd

def load_data(file_path):
    """
    Load data from a CSV file into a pandas DataFrame.
    
    Parameters:
    file_path (str): The file path of the CSV file.
    
    Returns:
    pandas.DataFrame: The loaded data.
    """
    # Read the data into a pandas DataFrame
    data = pd.read_csv(file_path, header=0)

    # Select the required columns
    data = data[['Reference area',
                 'TIME_PERIOD',
                 'Education level',
                 'Unit of measure',
                 'OBS_VALUE']]

    # Rename the columns
    data = data.rename(columns={
        'Reference area': 'Country',
        'TIME_PERIOD': 'Year',
        'Education level': 'Education Level',
        'Unit of measure': 'Unit',
        'OBS_VALUE': 'Value'
        })

    # Convert the 'Year' column to integer type
    data.astype({'Year': 'int32'}).dtypes

    # Remove rows with 'Country' as 'New Zealand'
    data = data[data['Country'] != 'New Zealand']
    
    return data

# Specify the file path
file_path = "C:/Users/jorge/Documents/PEC3-DataVisualization/Data/data_edu_europe.csv"

# Load the data
data = load_data(file_path)


In [13]:
distinct_edu_levels = data['Education Level'].unique()
distinct_countries = data['Country'].unique()

print(distinct_countries)
print(distinct_edu_levels)

['Sweden' 'Slovenia' 'Austria' 'Romania' 'France' 'Finland' 'Belgium'
 'Estonia' 'Spain' 'Italy' 'Netherlands' 'Germany' 'Slovak Republic'
 'Croatia' 'Denmark' 'Iceland' 'European Union (25 countries)' 'Latvia'
 'Czechia' 'Switzerland' 'Luxembourg' 'Greece' 'Ireland' 'Portugal'
 'Lithuania' 'Norway' 'Poland' 'Hungary' 'United Kingdom']
['Tertiary education' 'Primary to post-secondary non-tertiary education'
 'Primary to tertiary education']


In [None]:
!pip install dash
!pip install matplotlib

# Gráfico de Líneas

1. Grafico de year vs. USD dollars total de España
2. Grafico de year vs. USD dollars subdividido entre terciaria y no-terciaria de España
3. Grafico de year vs. USD dollars total de España comparación la union europea
4. Grafico de year vs. USD dollars total de España comparación todos países 


La siguiente celda prepaea los datos para la visualización de la evolución de la educación.

In [10]:
# Set the year and minimum non-NaN count
year = 2008
min_non_nan = 10

# Filter the data based on year, education level, and unit
data_line = data.where(data['Year'] >= year) \
                     .where(data['Education Level'] == 'Primary to tertiary education') \
                     .where(data['Unit'] == 'US dollars per student, PPP converted') \
                     .sort_values(by=['Year'], ascending=True).dropna()

# Filter out countries with less than min_non_nan non-NaN values
filter1 = data_line.groupby(['Country']).count()[['Value']].reset_index()
data_line = data_line[~data_line['Country'].isin(filter1[filter1['Value'] < min_non_nan]['Country'])]

# Create a dictionary to store country-specific data
country_data_dict = {}

# Iterate over unique countries and store their data in the dictionary
for country in data_line['Country'].unique():
    country_data_dict[country] = data_line[data_line['Country'] == country][['Year', 'Value']].sort_values(by=['Year'], ascending=True)

# Filter and sort data for Spain's primary education
spain_primary_edu = data.where(data['Year'] >= year) \
                        .where(data['Education Level'] == 'Primary to post-secondary non-tertiary education') \
                        .where(data['Unit'] == 'US dollars per student, PPP converted') \
                        .where(data['Country'] == 'Spain') \
                        .dropna() \
                        .sort_values(by=['Year'], ascending=True)[['Year', 'Value']]

# Filter and sort data for Spain's tertiary education
spain_terciar_edu = data.where(data['Year'] >= year) \
                        .where(data['Education Level'] == 'Tertiary education') \
                        .where(data['Unit'] == 'US dollars per student, PPP converted') \
                        .where(data['Country'] == 'Spain') \
                        .dropna() \
                        .sort_values(by=['Year'], ascending=True)[['Year', 'Value']]


La siguiente celda lanza la aplicacion Dash

In [11]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import plotly.graph_objs as go
import pandas as pd


app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='line-chart'),
    html.Button('Siguiente', id='button-show-next', n_clicks=0)
])


"""
This function is a callback function for updating a line chart figure in a Dash application.
It takes the number of clicks on a button and the current figure as inputs, and returns an updated figure.

Parameters:
    - n_clicks (int): The number of clicks on the button.
    - current_figure (dict): The current figure of the line chart.

Returns:
    - dict: The updated figure with new traces and layout.

"""
@app.callback(Output('line-chart', 'figure'),
              [Input('button-show-next', 'n_clicks')],
              [State('line-chart', 'figure')])
def update_graph(n_clicks, current_figure):

    current_traces = current_figure['data'] if current_figure else []

    if n_clicks == 0:
        layout = go.Layout(
            title='Gasto en educación por estudiante en España',
            xaxis=dict(title='Año'),
            yaxis=dict(title='USD por estudiante'),
            showlegend=True
        )

        new_trace = go.Scatter(
            x=country_data_dict['Spain']['Year'],
            y=country_data_dict['Spain']['Value'],
            mode='markers+lines',  
            name='España',
            line=dict(shape='spline',smoothing=0.3,width=2),
            marker=dict(size=8)  
        )

        current_traces.append(new_trace)

    elif n_clicks == 1:
        layout = go.Layout(
            title='Gasto en educación por estudiante en España desgranado por tipo de educación',
            xaxis=dict(title='Año'),
            yaxis=dict(title='USD por estudiante'),
            showlegend=True,
            transition={'duration': 600, 'easing': 'cubic-in-out'}
        )
        
        new_trace = go.Scatter(
            x=spain_primary_edu['Year'],
            y=spain_primary_edu['Value'],
            mode='markers+lines',  # Cambiado a 'markers+lines'
            name='No terciaria',
            line=dict(shape='spline',smoothing=0.6,width=1),
            marker=dict(size=9)  # Tamaño de los puntos
        )

        current_traces.append(new_trace)

        new_trace = go.Scatter(
            x=spain_terciar_edu['Year'],
            y=spain_terciar_edu['Value'],
            mode='markers+lines',  
            name='Terciaria',
            line=dict(shape='spline',smoothing=0.6,width=1),
            marker=dict(size=8)  
        )

        current_traces.append(new_trace)

    elif n_clicks == 2:
        current_traces = [current_traces[0]]

        layout = go.Layout(
            title='Gasto en educación por estudiante en España comparado con la UE-25',
            xaxis=dict(title='Año'),
            yaxis=dict(title='USD por estudiante'),
            showlegend=True,
            transition={'duration': 500, 'easing': 'cubic-in-out'}
        )

        new_trace = go.Scatter(
            x=country_data_dict['European Union (25 countries)']['Year'],
            y=country_data_dict['European Union (25 countries)']['Value'],
            mode='markers+lines',  
            name='EU-25',
            line=dict(shape='spline',smoothing=0.3,width=2),
            marker=dict(size=8)  
        )

        current_traces.append(new_trace)

    elif n_clicks == 3:
        for trace in current_traces:
            trace['line']['width'] = 5
            trace['marker']['size'] = 11

        layout = go.Layout(
            title='Gasto en educación por estudiante en España comparado con otros países europeos',
            xaxis=dict(title='Año'),
            yaxis=dict(title='USD por estudiante'),
            showlegend=True,
            transition={'duration': 1000, 'easing': 'cubic-in-out'}
        )

        for country in country_data_dict.keys():
            if country != 'Spain' and country != 'European Union (25 countries)':
                new_trace = go.Scatter(
                    x=country_data_dict[country]['Year'],
                    y=country_data_dict[country]['Value'],
                    mode='markers+lines', 
                    name=country,
                    line=dict(shape='spline',smoothing=0.7,width=1),
                    marker=dict(size=6) 
                )

                current_traces.append(new_trace)

    else:
        return dash.no_update


    return {'data': current_traces, 'layout': layout}

if __name__ == '__main__':
    app.run_server(debug=True)


# Gráfico de barras

1. Gráfico de barras de country vs. GDP porcentaje total en 2005
2. Gráfico de barras de country vs. GDP porcentaje total ente 2005 y 2019
2. Gráfico de barras de country vs. GDP porcentaje subdividido entre terciaria y no-terciaria en 2019


In [12]:
# Define the years for the bars
year_old = 2005
year_new = 2019

# Filter the data for the old year and total education level
data_bar_old_total = data.where(data['Year'] == year_old) \
                               .where(data['Unit'] == 'Percentage of GDP per capita') \
                               .where(data['Education Level'] == 'Primary to tertiary education') \
                               .dropna().sort_values(by=["Value"]).reset_index()[['Country', 'Value']]

# Filter the data for the old year and tertiary education level
data_bar_old_tertiary = data.where(data['Year'] == year_old) \
                                  .where(data['Unit'] == 'Percentage of GDP per capita') \
                                  .where(data['Education Level'] == 'Tertiary education') \
                                  .dropna().sort_values(by=["Value"]).reset_index()[['Country', 'Value']]

# Filter the data for the new year and total education level
data_bar_new_total = data.where(data['Year'] == year_new) \
                               .where(data['Unit'] == 'Percentage of GDP per capita') \
                               .where(data['Education Level'] == 'Primary to tertiary education') \
                               .dropna().sort_values(by=["Value"]).reset_index()[['Country', 'Value']]

# Filter the data for the new year and tertiary education level
data_bar_new_tertiary = data.where(data['Year'] == year_new) \
                                  .where(data['Unit'] == 'Percentage of GDP per capita') \
                                  .where(data['Education Level'] == 'Tertiary education') \
                                  .dropna().sort_values(by=["Value"]).reset_index()[['Country', 'Value']]

# Get the set of countries for each combination
countries_old_total = set(data_bar_old_total['Country'])
countries_old_tertiary = set(data_bar_old_tertiary['Country'])
countries_new_total = set(data_bar_new_total['Country'])
countries_new_tertiary = set(data_bar_new_tertiary['Country'])

# Find the common countries across all combinations
common_countries = list(countries_old_total.intersection(countries_old_tertiary, countries_new_total, countries_new_tertiary))

# Filter the data for the common countries
data_bar_old_total = data_bar_old_total[data_bar_old_total['Country'].isin(common_countries)]
data_bar_old_tertiary = data_bar_old_tertiary[data_bar_old_tertiary['Country'].isin(common_countries)]
data_bar_new_total = data_bar_new_total[data_bar_new_total['Country'].isin(common_countries)]
data_bar_new_tertiary = data_bar_new_tertiary[data_bar_new_tertiary['Country'].isin(common_countries)] 

In [13]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import plotly.graph_objs as go
import pandas as pd

app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='line-chart'),
    html.Button('Siguiente', id='button-show-next', n_clicks=0)
])


"""
This function is a callback function for updating a line chart figure in a Dash application.
It takes the number of clicks on a button and the current figure as inputs, and returns an updated figure.

Parameters:
    - n_clicks (int): The number of clicks on the button.
    - current_figure (dict): The current figure of the line chart.

Returns:
    - dict: The updated figure with new traces and layout.

"""
@app.callback(Output('line-chart', 'figure'),
              [Input('button-show-next', 'n_clicks')],
              [State('line-chart', 'figure')])
def update_graph(n_clicks, current_figure):

    current_traces = current_figure['data'] if current_figure else []

    if n_clicks == 0:
        layout = go.Layout(
            title='Porcentaje del PIB per capita destinado a educación en Europa 2005',
            xaxis=dict(title='País'),
            yaxis=dict(title='Porcentaje del PIB (%)'),
            showlegend=False
        )

        new_trace = go.Bar(
                    x=data_bar_old_total['Country'],
                    y=data_bar_old_total['Value'],
                    name='2005',
                    opacity=0.75,
                    offsetgroup=0
                )

        index_eu = data_bar_old_total[data_bar_old_total['Country'] == 'European Union (25 countries)'].index[0]
        index_sp = data_bar_old_total[data_bar_old_total['Country'] == 'Spain'].index[0]

        colors = ['#051c2c'] * len(data_bar_old_total['Country'])
        colors[index_eu] = '#067D1C'  # Set a different color for the bar
        colors[index_sp] = '#9F0202'  # Set a different color for the bar

        new_trace.marker = dict(color=colors)

        current_traces.append(new_trace)

    elif n_clicks == 1:
        layout = go.Layout(
            title='Porcentaje del PIB per capita destinado a educación en Europa 2005 y 2019',
            xaxis=dict(title='País'),
            yaxis=dict(title='Porcentaje del PIB (%)'),
            showlegend=True,
            transition={'duration': 1000, 'easing': 'cubic-in-out'}
        )

        new_trace = go.Bar(
                    x=data_bar_new_total['Country'],
                    y=data_bar_new_total['Value'],
                    name='2019',
                    offsetgroup=1,
                    opacity=0.75
                )

        colors = ['#00AAAD'] * len(data_bar_new_total['Country'])*2
        colors[4] = '#E08D1C'
        colors[6] = '#FE65EA'  # Set a different color for the bar 9F0202
        new_trace.marker = dict(color=colors)

        current_traces.append(new_trace)

    elif n_clicks == 2:
        layout = go.Layout(
            title='Porcentaje del PIB per capita destinado a educación en Europa 2005 y 2019',
            xaxis=dict(categoryorder='array', categoryarray=data_bar_new_total['Country'],title='País'),
            yaxis=dict(title='Porcentaje del PIB (%)'),
            showlegend=True,
            transition={'duration': 2000, 'easing': 'cubic-in-out'}
        )

    elif n_clicks == 3:
        layout = go.Layout(
            title='Porcentaje del PIB per capita destinado a educación en Europa entre 2005 y 2019',
            xaxis=dict(title='País'),
            yaxis=dict(title='Porcentaje del PIB (%)'),
            showlegend=True,
            transition={'duration': 1000, 'easing': 'cubic-in-out'}
        )

        new_trace = go.Bar(
                    x=data_bar_old_tertiary['Country'],
                    y=data_bar_old_tertiary['Value'],
                    name='2005-Terciaria',
                    opacity=0.2,
                    offsetgroup=0
                )
        current_traces.append(new_trace)

        new_trace = go.Bar(
                    x=data_bar_new_tertiary['Country'],
                    y=data_bar_new_tertiary['Value'],
                    name='2019-Terciaria',
                    opacity=0.2,
                    offsetgroup=1
                )
        current_traces.append(new_trace)

    elif n_clicks == 4:
        layout = go.Layout(
            title='Porcentaje del PIB per capita destinado a educación en Europa 2005 y 2019',
            xaxis=dict(categoryorder='array', categoryarray=data_bar_new_tertiary['Country'],title='País'),
            yaxis=dict(title='Porcentaje del PIB (%)'),
            showlegend=True,
            transition={'duration': 2000, 'easing': 'cubic-in-out'}
        )
   
    else:
        return dash.no_update


    return {'data': current_traces, 'layout': layout}

if __name__ == '__main__':
    app.run_server(debug=True)
