# Check how Europe's and Africa's SPI varies on the 4 most related variables


## Scope
- [x] Make 2 different dataframes for the two continents.
- [x] Compare the 4 variables for the two continents.
- [x] Plot the spi_score against the top 4 variables.

## Plot Comparison

![visualization%20%284%29.png](attachment:visualization%20%284%29.png)

- `Access to advanced education` & `Access to information and communication` does not follow the same pattern in Africa as they do in Europe.
- Improving the above two factors can increase Africa's SPI score.

In [9]:
import pandas as pd
import altair as alt

In [10]:
# Read csv
df = pd.read_csv('C:/Users/Tanish/Desktop/SPI-Analysis-Project/Data/new_spi.csv')

In [11]:
# Preview
df.head()

Unnamed: 0,spi_rank,country,spi_score,basic_human_needs,wellbeing,opportunity,basic_nutri_med_care,water_sanitation,shelter,personal_safety,access_basic_knowledge,access_info_comm,health_wellness,env_quality,personal_rights,personal_freedom_choice,inclusiveness,access_adv_edu,continent
0,1,Norway,92.63,95.29,93.3,89.3,98.81,98.33,93.75,90.29,98.66,95.8,89.32,89.44,96.34,91.16,83.77,85.92,Europe
1,2,Finland,92.26,95.62,93.09,88.07,98.99,99.26,96.48,87.75,96.32,95.14,85.73,95.15,96.13,88.1,82.81,85.23,Europe
2,3,Denmark,92.15,95.3,92.74,88.41,98.62,98.21,94.92,89.46,97.44,98.18,85.15,90.2,97.08,90.03,81.64,84.89,Europe
3,4,Iceland,91.78,96.66,93.65,85.04,98.99,98.82,93.16,95.66,99.51,93.12,91.02,90.93,95.14,88.01,77.63,79.39,Europe
4,5,Switzerland,91.78,95.25,93.8,86.28,98.72,98.96,92.97,90.35,98.6,95.07,91.5,90.05,96.69,90.65,74.81,82.99,Europe


In [12]:
# Shape
df.shape

(168, 19)

### Filter out data for Europe

In [13]:
# Choose europe continent
eur_df = df[df['continent'] == 'Europe']

In [14]:
# Preview
eur_df.head()

Unnamed: 0,spi_rank,country,spi_score,basic_human_needs,wellbeing,opportunity,basic_nutri_med_care,water_sanitation,shelter,personal_safety,access_basic_knowledge,access_info_comm,health_wellness,env_quality,personal_rights,personal_freedom_choice,inclusiveness,access_adv_edu,continent
0,1,Norway,92.63,95.29,93.3,89.3,98.81,98.33,93.75,90.29,98.66,95.8,89.32,89.44,96.34,91.16,83.77,85.92,Europe
1,2,Finland,92.26,95.62,93.09,88.07,98.99,99.26,96.48,87.75,96.32,95.14,85.73,95.15,96.13,88.1,82.81,85.23,Europe
2,3,Denmark,92.15,95.3,92.74,88.41,98.62,98.21,94.92,89.46,97.44,98.18,85.15,90.2,97.08,90.03,81.64,84.89,Europe
3,4,Iceland,91.78,96.66,93.65,85.04,98.99,98.82,93.16,95.66,99.51,93.12,91.02,90.93,95.14,88.01,77.63,79.39,Europe
4,5,Switzerland,91.78,95.25,93.8,86.28,98.72,98.96,92.97,90.35,98.6,95.07,91.5,90.05,96.69,90.65,74.81,82.99,Europe


In [15]:
# Check for europe filter
eur_df.continent.unique()

array(['Europe'], dtype=object)

In [16]:
# Shape
eur_df.shape

(39, 19)

### Filter out data for Africa

In [17]:
# Choose europe continent
afr_df = df[df['continent'] == 'Africa']

In [18]:
# Preview
afr_df.head()

Unnamed: 0,spi_rank,country,spi_score,basic_human_needs,wellbeing,opportunity,basic_nutri_med_care,water_sanitation,shelter,personal_safety,access_basic_knowledge,access_info_comm,health_wellness,env_quality,personal_rights,personal_freedom_choice,inclusiveness,access_adv_edu,continent
44,45,Mauritius,77.3,90.31,76.9,64.7,92.4,96.75,91.33,80.76,87.95,74.72,67.67,77.27,86.97,67.93,51.58,52.32,Africa
57,58,Tunisia,73.95,87.4,69.27,65.19,94.77,94.39,86.08,74.38,71.44,79.93,69.75,55.97,83.94,68.39,50.28,58.14,Africa
76,77,Cabo Verde,70.0,78.6,66.3,65.09,87.24,74.0,84.59,68.57,68.84,68.95,65.46,61.93,89.99,67.52,59.62,43.24,Africa
79,80,South Africa,69.17,69.96,69.33,68.22,80.89,65.54,84.32,49.1,77.13,76.05,55.38,68.77,87.51,69.84,56.94,58.58,Africa
89,90,Algeria,67.04,84.7,62.27,54.16,92.07,92.78,86.65,67.29,77.27,49.66,70.1,52.03,54.11,67.1,41.55,53.9,Africa


In [19]:
# Check for africa filter
afr_df.continent.unique()

array(['Africa'], dtype=object)

In [20]:
# Shape
afr_df.shape

(53, 19)

### Further filter it for 4 relevant fields

In [21]:
# List all the required columns
rel_cols = ['access_info_comm', 'wellbeing', 'opportunity','access_adv_edu']

In [22]:
# Filter all the relevant cols for eur df
eur_rel_df = eur_df[rel_cols]

In [23]:
# Preview
eur_rel_df.head()

Unnamed: 0,access_info_comm,wellbeing,opportunity,access_adv_edu
0,95.8,93.3,89.3,85.92
1,95.14,93.09,88.07,85.23
2,98.18,92.74,88.41,84.89
3,93.12,93.65,85.04,79.39
4,95.07,93.8,86.28,82.99


In [24]:
# Groupby on europe
eur_gb_avg = eur_rel_df.mean()
eur_gb_avg

access_info_comm    86.563846
wellbeing           84.657436
opportunity         75.965128
access_adv_edu      75.015128
dtype: float64

In [25]:
# Reset index and rename the col
eur_gb_avg_df = eur_gb_avg.reset_index(name='avg_value').rename(columns={'index': 'attributes'})

In [26]:
# Create the pastel color palette
pastel_colors = ['#FBB4AE', '#B3CDE3', '#CCEBC5', '#DECBE4', '#FED9A6']


# Create the bar chart using Altair
bar_chart = alt.Chart(eur_gb_avg_df).mark_bar().encode(
    x='attributes',
    y=alt.Y('avg_value:Q',axis=alt.Axis(title='average score value'),scale=alt.Scale(domain=[0,90])),
    color=alt.Color('attributes:N',  scale=alt.Scale(range=pastel_colors)),
    tooltip=['attributes', 'avg_value']
).properties(
    width=500,
    height=300,
    title='Average score values for Europe'
)

In [27]:
# Calculate the mean of spi_score and save it in a variable
spi_score_mean = eur_df.spi_score.mean()

In [28]:
# Add a red line for the mean of 'spi_score'
mean_line = alt.Chart(pd.DataFrame({'mean_spi_score': [spi_score_mean]})).mark_rule(color='red').encode(
    y='mean_spi_score:Q',
    tooltip=[alt.Tooltip('mean_energy:Q', format='.2f')]  # Use 'Q' for quantitative data (i.e., numeric data)
)

In [29]:
# Combine the two charts
chart = (bar_chart + mean_line)

In [30]:
chart.configure_axis(labelAngle=0)

In [31]:
# Filter all the relevant cols for eur df
afr_rel_df = afr_df[rel_cols]

In [32]:
# Preview
afr_rel_df.head()

Unnamed: 0,access_info_comm,wellbeing,opportunity,access_adv_edu
44,74.72,76.9,64.7,52.32
57,79.93,69.27,65.19,58.14
76,68.95,66.3,65.09,43.24
79,76.05,69.33,68.22,58.58
89,49.66,62.27,54.16,53.9


In [33]:
# Groupby on africa
afr_gb_avg = afr_rel_df.mean()
afr_gb_avg

access_info_comm    47.553208
wellbeing           53.339245
opportunity         47.295660
access_adv_edu      36.827547
dtype: float64

In [34]:
# Reset index and rename the col
afr_gb_avg_df = afr_gb_avg.reset_index(name='avg_value').rename(columns={'index': 'attributes'})

In [35]:
# Create the pastel color palette
pastel_colors = ['#FBB4AE', '#B3CDE3', '#CCEBC5', '#DECBE4', '#FED9A6']


# Create the bar chart using Altair
bar_chart_afr = alt.Chart(afr_gb_avg_df).mark_bar().encode(
    x='attributes',
    y=alt.Y('avg_value:Q',axis=alt.Axis(title='average score value'),scale=alt.Scale(domain=[0,90])),
    color=alt.Color('attributes:N',  scale=alt.Scale(range=pastel_colors)),
    tooltip=['attributes', 'avg_value']
).properties(
    width=500,
    height=300,
    title='Average score values for Africa'
)

In [36]:
# Calculate the mean of spi_score and save it in a variable
spi_score_mean_afr = afr_df.spi_score.mean()

In [37]:
# Add a red line for the mean of 'spi_score'
mean_line_afr = alt.Chart(pd.DataFrame({'mean_spi_score': [spi_score_mean_afr]})).mark_rule(color='red').encode(
    y='mean_spi_score:Q',
    tooltip=[alt.Tooltip('mean_energy:Q', format='.2f')]
)

In [38]:
# Combine the two charts
chart2 = (bar_chart_afr + mean_line_afr)

In [39]:
chart2.configure_axis(labelAngle=0)

In [40]:
ch = chart|chart2

In [41]:
ch.configure_axis(labelAngle=0)