In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Questions
1. Do countries with more religious populations have lower education levels?
2. Do countries with less religious populations have higher education levels?
3. Does any specific type of religion affect the education levels?

## Question 1: Do countries with more religious populations have lower education levels?
1. Which ten countries/regions have the largest religious population?
2. Which ten countries/regions have the lowest education levels?
3. Are these countries intersect with each other, if so how many?

## Question 2: Do countries with less religious populations have higher education levels?
1. Which ten countries have the smallest religious population?
2. Which ten countries have the highest education levels?
3. Are these countries intersect with each other, if so how many?

## Question 3: Does any specific type of religion affect the education levels?
1. What are the education levels of countries with different religions?

In [None]:
WORLD_RELIGIONS_DIR = "../input/world-religions/"
EDUCATION_STATISTICS_DIR = "../input/education-statistics/edstats-csv-zip-32-mb-/"

# Education Statistics

In [None]:
es_stats_series_df = pd.read_csv(os.path.join(EDUCATION_STATISTICS_DIR, "EdStatsSeries.csv"))
es_stats_country_df = pd.read_csv(os.path.join(EDUCATION_STATISTICS_DIR, "EdStatsCountry.csv"))
es_stats_footnote_df = pd.read_csv(os.path.join(EDUCATION_STATISTICS_DIR, "EdStatsFootNote.csv"))
es_stats_country_series_df = pd.read_csv(os.path.join(EDUCATION_STATISTICS_DIR, "EdStatsCountry-Series.csv"))
es_stats_data_df = pd.read_csv(os.path.join(EDUCATION_STATISTICS_DIR, "EdStatsData.csv"))

In [None]:
es_stats_series_df.head()

In [None]:
es_stats_series_df.iloc[0]["Short definition"]

In [None]:
es_stats_country_df.head()

In [None]:
es_stats_footnote_df.head()

In [None]:
es_stats_country_series_df.head()

In [None]:
es_stats_data_df.head()

In [None]:
es_stats_data_prj_rmvd_df = es_stats_data_df.drop([str(year) for year in range(2020, 2101, 5)] + ["Unnamed: 69"], axis=1)
es_stats_data_prj_rmvd_df.head()

In [None]:
es_stats_series_df[["Series Code", "Short definition"]].values[:5]

# World Religions Statistics

In [None]:
wr_regional_df = pd.read_csv(os.path.join(WORLD_RELIGIONS_DIR, "regional.csv"))
wr_national_df = pd.read_csv(os.path.join(WORLD_RELIGIONS_DIR, "national.csv"))
wr_global_df = pd.read_csv(os.path.join(WORLD_RELIGIONS_DIR, "global.csv"))

In [None]:
print("Unique years:", wr_regional_df["year"].unique())
print("Unique regions:", wr_regional_df["region"].unique())

In [None]:
wr_regional_df.head()

In [None]:
print(f"Total of {len(wr_regional_df)} rows.")

In [None]:
wr_national_df.head()

In [None]:
print(f"Total of {len(wr_national_df)} rows.")

In [None]:
wr_global_df.head()

In [None]:
print(f"Total of {len(wr_global_df)} rows.")

# Question Answers
## Question 1
Do countries with more religious populations have lower education levels?

### Question 1.1
Which ten countries/regions have the largest religious population?

In [None]:
wr_national_df[wr_national_df["year"] == 2010].nlargest(20, "religion_sumpercent")

### Question 1.2
Which ten countries/regions have the lowest education levels?

This question is answered with the following information:
`Percentage of population age 15+ with no education in 2010.`

In [None]:
es_stats_data_prj_rmvd_df[es_stats_data_prj_rmvd_df["Indicator Code"] == "BAR.NOED.15UP.ZS"].nlargest(20, "2010")

### Question 1.3
Are these countries intersect with each other, if so how many?

In [None]:
lar_relig_pop_countries_df = wr_national_df[wr_national_df["year"] == 2010].nlargest(20, "religion_sumpercent")
low_ed_levels_countries_df = es_stats_data_prj_rmvd_df[es_stats_data_prj_rmvd_df["Indicator Code"] == "BAR.NOED.15UP.ZS"].nlargest(20, "2010")

set(lar_relig_pop_countries_df["code"].values) & set(low_ed_levels_countries_df["Country Code"].values)

Liberia and Mali are listed on both the 20 lowest educated countries and 20 countries with the highest religious population in 2010.

### Question 1 Conclusion
**Do countries with more religious populations have lower education levels?**

From this result, only 10% of the listed countries are present on both lists. There is no strong evidence to believe that the answer to this question is *Yes*. Thus, I am concluding that the answer is as **No**.

## Question 2
Do countries with less religious populations have higher education levels?

### Question 2.1
Which ten countries have the smallest religious population?

In [None]:
wr_national_df[wr_national_df["year"] == 2010].nsmallest(20, "religion_sumpercent")

### Question 2.2
Which ten countries have the highest education levels?

In [None]:
es_stats_data_prj_rmvd_df[es_stats_data_prj_rmvd_df["Indicator Code"] == "BAR.NOED.15UP.ZS"].nsmallest(20, "2010")

### Question 2.3
Are these countries intersect with each other, if so how many?

In [None]:
smal_relig_pop_countries_df = wr_national_df[wr_national_df["year"] == 2010].nsmallest(20, "religion_sumpercent")
high_ed_levels_countries_df = es_stats_data_prj_rmvd_df[es_stats_data_prj_rmvd_df["Indicator Code"] == "BAR.NOED.15UP.ZS"].nsmallest(20, "2010")

set(smal_relig_pop_countries_df["code"].values) & set(high_ed_levels_countries_df["Country Code"].values)

Australia, Belize and Hungary are listed on both the 20 highest educated countries and 20 countries with the lowest religious population in 2010.

### Question 2 Conclusion
**Do countries with less religious populations have higher education levels?**

From this result, only 15% of the listed countries are present on both lists. There is no strong evidence to believe that the answer to this question is *Yes*. Thus, I am concluding that the answer is as **No**.

## Question 3
Do countries with more religious populations have lower education levels?

### Question 3.1
What are the education levels of countries with different religions?

In [None]:
smal_relig_pop_countries_df = wr_national_df[wr_national_df["year"] == 2010]
high_ed_levels_countries_df = es_stats_data_prj_rmvd_df[es_stats_data_prj_rmvd_df["Indicator Code"] == "BAR.NOED.15UP.ZS"].drop([str(year) for year in range(1970, 2010)] + [str(year) for year in range(2011, 2018)], axis=1)

In [None]:
high_ed_levels_countries_df.rename(columns={"Country Code": "code"}, inplace=True)

In [None]:
small_relig_high_ed_countries_df = pd.merge(smal_relig_pop_countries_df, high_ed_levels_countries_df, on="code")[["year", "state", "code", "Country Name", "2010"] + [x for x in smal_relig_pop_countries_df.columns if x.endswith("_percent")]]
small_relig_high_ed_countries_df.head()

In [None]:
ed_rel_corr_df = small_relig_high_ed_countries_df.drop("year", axis=1).corr()[["2010"]].dropna().drop("2010")
ed_rel_corr_df

In [None]:
np.argmax(np.absolute(ed_rel_corr_df))

In [None]:
ed_rel_corr_df.iloc[np.argmax(np.absolute(ed_rel_corr_df))]

**Work in progress...**