In [1]:
import altair as alt
import pandas as pd

In [2]:
data = pd.read_csv('cmc.data', names=["Age_of_wife", "Education_of_wife", "Education_of_husband", "Number_of_children_ever_born", "Religion_of_wife", "Wife_is_now_working", "Occupation_of_husband", "Standard_of_living_index", "Media_exposure", "Contraceptive_method_used"])

In [3]:
data.head()

Unnamed: 0,Age_of_wife,Education_of_wife,Education_of_husband,Number_of_children_ever_born,Religion_of_wife,Wife_is_now_working,Occupation_of_husband,Standard_of_living_index,Media_exposure,Contraceptive_method_used
0,24,2,3,3,1,1,2,3,0,1
1,45,1,3,10,1,1,3,4,0,1
2,43,2,3,7,1,1,3,4,0,1
3,42,3,2,9,1,1,3,3,0,1
4,36,3,3,8,1,1,3,2,0,1


In [4]:
age_distribution = alt.Chart(data, title="Demography").mark_bar().encode(
    alt.X("Age_of_wife:O"),
    alt.Y("count()"),
    )

age_distribution_text = age_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

age_distribution += age_distribution_text

In [5]:
heatmap = alt.Chart(data).mark_bar().encode(
    alt.X("Age_of_wife:O"),
    alt.Y("Contraceptive_method_used:N"),
    alt.Color('count()', scale=alt.Scale(scheme='bluegreen'))
    )

In [6]:
religion_distribution = alt.Chart(data, title="Religion").mark_bar().encode(
    alt.X("Religion_of_wife:O"),
    alt.Y("count()"),
    alt.Color("Religion_of_wife:O", legend=alt.Legend(values=["0 - Non-Islam", "1 - Islam"])),
)

religion_distribution_text = religion_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

religion_distribution += religion_distribution_text

In [7]:
df = data

religion = df['Religion_of_wife'].value_counts()

In [8]:
pct = 1 / religion[0]

non_islam_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Non-Islam contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Religion_of_wife == "0"')

non_islam_cmu_text = non_islam_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

non_islam_cmu += non_islam_cmu_text

In [9]:
pct = 1 / religion[1]

islam_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct),
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Islam contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Religion_of_wife == "1"')

islam_cmu_text = islam_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

islam_cmu = islam_cmu + islam_cmu_text

In [10]:
median_number_of_children = alt.Chart(data).mark_line(point=True).encode(
    alt.X("Age_of_wife:O"),
    alt.Y("median(Number_of_children_ever_born):Q"),
    alt.Color("Religion_of_wife:O")
)

In [11]:
wife_education_distribution = alt.Chart(data, title="Education").mark_bar().encode(
    alt.X("Education_of_wife:O"),
    alt.Y("count()"),
)

wife_education_distribution_text = wife_education_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

wife_education_distribution += wife_education_distribution_text

In [12]:
husband_education_distribution = alt.Chart(data).mark_bar().encode(
    alt.X("Education_of_husband:O"),
    alt.Y("count()"),
    )

husband_education_distribution_text = husband_education_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

husband_education_distribution += husband_education_distribution_text

In [13]:
mean_wife_education_cmu = alt.Chart(data).mark_bar().encode(
    alt.X("Contraceptive_method_used:N"),
    alt.Y("mean(Education_of_wife):Q"),
    alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
)

mean_wife_education_cmu_text = mean_wife_education_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("mean(Education_of_wife):Q", format=".2f")
)

mean_wife_education_cmu += mean_wife_education_cmu_text

In [14]:
mean_husband_education_cmu = alt.Chart(data).mark_bar().encode(
    alt.X("Contraceptive_method_used:N"),
    alt.Y("mean(Education_of_husband):Q"),
    alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
    )

mean_husband_education_cmu_text = mean_husband_education_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("mean(Education_of_husband):Q", format=".2f")
)

mean_husband_education_cmu += mean_husband_education_cmu_text

In [15]:
media_exposure_distribution = alt.Chart(data, title="Media exposure").mark_bar().encode(
    alt.X("Media_exposure:O"),
    alt.Y("count()"),
    alt.Color("Media_exposure:O", legend=alt.Legend(values=["0 - Good", "1 - Not good"])),
)

media_exposure_distribution_text = media_exposure_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-20,
    fontWeight="bold"
).encode(
    text="count()"
)

media_exposure_distribution += media_exposure_distribution_text

In [16]:
media_exposure = df['Media_exposure'].value_counts()

In [17]:
pct = 1 / media_exposure[0]

good_media_exposure_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Good media exposure - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Media_exposure == "0"')

good_media_exposure_cmu_text = good_media_exposure_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

good_media_exposure_cmu += good_media_exposure_cmu_text

In [18]:
pct = 1 / media_exposure[1]

not_good_media_exposure_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Not good media exposure - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Media_exposure == "1"')

not_good_media_exposure_cmu_text = not_good_media_exposure_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-25,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

not_good_media_exposure_cmu += not_good_media_exposure_cmu_text

In [19]:
wife_is_now_working_distribution = alt.Chart(data, title="Employment").mark_bar().encode(
    alt.X("Wife_is_now_working:O"),
    alt.Y("count()"),
    alt.Color("Wife_is_now_working:O", legend=alt.Legend(values=["0 - Yes", "1 - No"])),
)

wife_is_now_working_distribution_text = wife_is_now_working_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

wife_is_now_working_distribution += wife_is_now_working_distribution_text

In [20]:
wife_is_now_working = df['Wife_is_now_working'].value_counts()

In [21]:
pct = 1 / wife_is_now_working[0]

wife_is_now_working_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Wife is now working - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Wife_is_now_working == "0"')

wife_is_now_working_cmu_text = wife_is_now_working_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

wife_is_now_working_cmu += wife_is_now_working_cmu_text

In [22]:
pct = 1 / wife_is_now_working[1]

wife_isnt_now_working_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Wife isn't now working - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Wife_is_now_working == "1"')

wife_isnt_now_working_cmu_text = wife_isnt_now_working_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

wife_isnt_now_working_cmu += wife_isnt_now_working_cmu_text

In [23]:
mean_husband_occupation1_cmu = alt.Chart(data).mark_bar().encode(
    alt.X("Contraceptive_method_used:N", title="Wife is now working"),
    alt.Y("mean(Occupation_of_husband):Q"),
    alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"])),
).transform_filter('datum.Wife_is_now_working == "0"')

mean_husband_occupation1_cmu_text = mean_husband_occupation1_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("mean(Occupation_of_husband):Q", format=".2f")
)

mean_husband_occupation1_cmu += mean_husband_occupation1_cmu_text

# alt.Column could not be used here due to the combined graph disappearing at vconcat

In [24]:
mean_husband_occupation2_cmu = alt.Chart(data).mark_bar().encode(
    alt.X("Contraceptive_method_used:N", title="Wife isn't now working"),
    alt.Y("mean(Occupation_of_husband):Q"),
    alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"])),
).transform_filter('datum.Wife_is_now_working == "1"')

mean_husband_occupation2_cmu_text = mean_husband_occupation2_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("mean(Occupation_of_husband):Q", format=".2f")
)

mean_husband_occupation2_cmu += mean_husband_occupation2_cmu_text

In [25]:
soli_distribution = alt.Chart(data, title="Standard of living").mark_bar().encode(
    alt.X("Standard_of_living_index:O"),
    alt.Y("count()"),
    alt.Color("Standard_of_living_index:O"),
)

soli_distribution_text = soli_distribution.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text="count()"
)

soli_distribution += soli_distribution_text

In [26]:
soli = df['Standard_of_living_index'].value_counts()

In [27]:
pct = 1 / soli[1]

soli1_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Standard-of-living index 1 - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Standard_of_living_index == "1"')

soli1_cmu_text = soli1_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

soli1_cmu += soli1_cmu_text

In [28]:
pct = 1 / soli[2]

soli2_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Standard-of-living index 2 - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Standard_of_living_index == "2"')

soli2_cmu_text = soli2_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

soli2_cmu += soli2_cmu_text

In [29]:
pct = 1 / soli[3]

soli3_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Standard-of-living index 3 - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Standard_of_living_index == "3"')

soli3_cmu_text = soli3_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

soli3_cmu += soli3_cmu_text

In [30]:
pct = 1 / soli[4]

soli4_cmu = alt.Chart(data).transform_calculate(
    percentage=str(pct)
).mark_bar().encode(
  alt.X("Contraceptive_method_used:N", axis=alt.Axis(title="Standard-of-living index 4 - contraceptive method used")),
  alt.Y("sum(percentage):Q", axis=alt.Axis(format="%")),
  alt.Color("Contraceptive_method_used:N", legend=alt.Legend(values=["1 - No use", "2 - Long-term", "3 - Short-term"]))
).transform_filter('datum.Standard_of_living_index == "4"')

soli4_cmu_text = soli4_cmu.mark_text(
    align="center",
    baseline="top",
    dy=-15,
    fontWeight="bold"
).encode(
    text=alt.Text("sum(percentage):Q", format=".1%")
)

soli4_cmu += soli4_cmu_text

In [31]:
alt.vconcat(
    age_distribution, heatmap,
    alt.hconcat(religion_distribution, non_islam_cmu, islam_cmu), 
    median_number_of_children,
    alt.hconcat(wife_education_distribution, husband_education_distribution, mean_wife_education_cmu, mean_husband_education_cmu), 
    alt.hconcat(media_exposure_distribution, good_media_exposure_cmu, not_good_media_exposure_cmu),
    alt.hconcat(wife_is_now_working_distribution, wife_is_now_working_cmu, wife_isnt_now_working_cmu, mean_husband_occupation1_cmu, mean_husband_occupation2_cmu),
    alt.hconcat(soli_distribution, soli1_cmu, soli2_cmu, soli3_cmu, soli4_cmu)
)