In [7]:
import pandas as pd
import altair as alt

In [8]:
df = pd.read_csv('avgIQpercountry.csv')
df.head()

Unnamed: 0,Rank,Country,Average IQ,Continent,Literacy Rate,Nobel Prizes,HDI (2021),Mean years of schooling - 2021,GNI - 2021,Population - 2023
0,1,Japan,106.48,Asia,0.99,29,0.925,13.4,42274,123294500.0
1,2,Singapore,105.89,Asia,0.97,0,0.939,11.9,90919,6014723.0
2,3,Hong Kong,105.37,Asia,0.94,1,0.952,12.2,62607,7491609.0
3,4,China,104.1,Asia,0.96,8,0.768,7.6,17504,1425671000.0
4,5,South Korea,102.35,Asia,0.98,0,0.925,12.5,44501,51784060.0


# Task 1: To discover relationships between the average IQ of a country and the other variables.

In [9]:
# Implementing selection
selection = alt.selection(type='multi', fields=['Continent'], on='mouseover', nearest=True)
base =  alt.Chart(df).properties(width=575, height=350)

p1 = base.mark_circle().encode(
    alt.X('Literacy Rate').scale(type = "linear"),
    alt.Y('Average IQ'),
    alt.Color('Continent'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(100,600))),
    tooltip = ['Country','Continent'],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_selection(selection).interactive()

p2 = base.mark_circle().encode(
    alt.X('HDI (2021)').scale(type = "linear"),
    alt.Y('Average IQ'),
    alt.Color('Continent'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(100,600))),
    tooltip = ['Country','Continent'],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_selection(selection).interactive()

p3 = base.mark_circle().encode(
    alt.X('Mean years of schooling - 2021').scale(type = "linear"),
    alt.Y('Average IQ'),
    alt.Color('Continent'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(100,600))),
    tooltip = ['Country','Continent'],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_selection(selection).interactive()

p4 = base.mark_circle().encode(
    alt.X('GNI - 2021', title= 'Log of GNI - 2021').scale(type = "log"),
    alt.Y('Average IQ'),
    alt.Color('Continent'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(100,600))),
    tooltip = ['Country','Continent'],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_selection(selection).interactive()

p5 = base.mark_circle().encode(
    alt.X('Population - 2023', title = 'Log of Population - 2023').scale(type = "log"),
    alt.Y('Average IQ'),
    alt.Color('Continent'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(100,600))),
    tooltip = ['Country','Continent'],
    opacity = alt.condition(selection, alt.value(1), alt.value(.2))
).add_selection(selection).interactive()

chart = (p1 | p2 ) & (p3 | p4)

chart.save('averageiq.html', embed_options={'renderer':'svg'})

   Use 'selection_point()' or 'selection_interval()' instead; these functions also include more helpful docstrings.
        combined and should be specified using "selection_point()".


# Task 2: To discover relationships between the average IQ of a continent and the other variables.

In [10]:
p1 = alt.Chart(df).mark_circle().encode(
    alt.X('average(Literacy Rate)'),
    alt.Y('average(Average IQ)'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(50,300))),
    alt.Color('Continent'),
    tooltip = ['Continent','average(Literacy Rate)','average(Average IQ)']
).interactive()

p2 = alt.Chart(df).mark_circle().encode(
    alt.X('average(HDI (2021))').scale(type = "linear"),
    alt.Y('average(Average IQ)'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(50,300))),
    alt.Color('Continent'),
    tooltip = ['Continent','average(HDI (2021))','average(Average IQ)']
).interactive()

p3 = alt.Chart(df).mark_circle().encode(
    alt.X('average(Mean years of schooling - 2021)').scale(type = "linear"),
    alt.Y('average(Average IQ)'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(50,300))),
    alt.Color('Continent'),
    tooltip = ['Continent','average(Mean years of schooling - 2021)','average(Average IQ)']
).interactive()

p4 = alt.Chart(df).mark_circle().encode(
    alt.X('sum(GNI - 2021)', title= 'Log of Total GNI - 2021').scale(type = "log"),
    alt.Y('average(Average IQ)'),
    alt.Size('sum(Population - 2023)',
        scale=alt.Scale(range=(50,300))),
    alt.Color('Continent'),
    tooltip = ['Continent','sum(GNI - 2021)','average(Average IQ)']
).interactive()

p5 = alt.Chart(df).mark_circle().encode(
    alt.X('sum(Population - 2023)', title = 'Log of Total Population - 2023').scale(type = "log"),
    alt.Y('average(Average IQ)'),
    alt.Color('Continent'),
    tooltip = ['Continent','sum(Population - 2023)','average(Average IQ)']
).interactive()

#(p1 | p2) & (p3 | p4)