In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data = pd.read_csv('../input/pisa-2018-school-questionnaire/CY07_MSU_SCH_QQQ.csv')

# **Data Preparation**

For the visualization we need country names and the scores in dataset.

In [None]:
country_vs_code_data = pd.read_csv('../input/needed-datasets-and-texts/countries_codes_and_coordinates.csv')

In [None]:
score_file = pd.read_csv(r'../input/needed-datasets-and-texts/scores.txt')
score_file.to_csv(r'/kaggle/working/scores.csv', index = None)

In [None]:
scores = pd.read_csv('/kaggle/working/scores.csv')

Here is the score data.

In [None]:
scores.head()

Here is the country names vs alpha-3 code data.

In [None]:
country_vs_code_data.head()

In [None]:
country_vs_code_data.drop(['Alpha-2 code','Numeric code','Latitude (average)','Longitude (average)'], axis =1, inplace = True)

In [None]:
for i, j in enumerate(country_vs_code_data["Alpha-3 code"]):
  country_vs_code_data["Alpha-3 code"][i] = j.replace('"','').replace(' ','')

Merging with country-code data.

In [None]:
data = data.merge(country_vs_code_data, 
                  left_on='CNT',
                  right_on='Alpha-3 code', 
                  how='left').drop(['Alpha-3 code'],axis =1)

To see which values ​​were NaN, I synced it to a temporary dataset and looked at the unique values.

In [None]:
nan_values = data[data['Country'].isna() == True]

While entering country names according to country codes here, I saw that some country names were missing and I added them.

In [None]:
data["Country"][data["CNT"] == "QAZ"] = "Baku(Azerbaijan)"
data["Country"][data["CNT"] == "QRT"] = "Tatarstan(RUS)"
data["Country"][data["CNT"] == "TAP"] = "Chinese Taipei"
data["Country"][data["CNT"] == "KSV"] = "Kosovo"
data["Country"][data["CNT"] == "QCI"] = "B-S-J-Z (China)"
data["Country"][data["CNT"] == "QMR"] = "Moscow Region(RUS)"

Merging with the score data.
Here, I changed the names of a few of them because the country names in the score csv did not match with the final version of the data.

In [None]:
data['Country'][data['Country'] == 'Moldova, Republic of'] = "Moldova"
data['Country'][data['Country'] == 'Viet Nam'] = "Vietnam"
data['Country'][data['Country'] == 'Korea, Republic of'] = "South Korea"
data['Country'][data['Country'] == 'Czech Republic'] = "Czechia"
data['Country'][data['Country'] == 'Brunei'] = "Brunei Darussalam"
data['Country'][data['Country'] == 'Russia'] = "Russian Federation"

In [None]:
data = data.merge(scores, on='Country', how='left')

In [None]:
nan_values = data[data['Score'].isna() ==  True]
nan_values['Country'].unique()

Since Moscow and Tatarstan are part of the Russian Federation, I equalized their scores.

In [None]:
data['Score'][data['Country'] == 'Moscow Region(RUS)'] = 481.7
data['Score'][data['Country'] == 'Tatarstan(RUS)'] = 481.7

The data we found from the internet: Spain's score 483, Vietnam's score 514 and Makedonia's 400.

In [None]:
data['Score'][data['Country'] == 'Spain'] = 483
data['Score'][data['Country'] == 'Vietnam'] = 514
data['Score'][data['Country'] == 'Macedonia, the former Yugoslav Republic of'] = 400

In [None]:
data.head()

I dropped the unnecessary features.

In [None]:
data.drop(['Unnamed: 0','CNTRYID','CYC','ADMINMODE','BOOKID','OECD','VER_DAT','SUBNATIO','STRATUM','Region','NatCen'], axis = 1, inplace = True)

In [None]:
data.head()

# **For World Mapping**

In [None]:
import pycountry 
import matplotlib.pyplot as plt
import geopandas
from geopandas import GeoDataFrame
import mapclassify

Coordination file for mapping

In [None]:
coordination_file = pd.read_csv (r'../input/needed-datasets-and-texts//latitude-longtitude.txt')
coordination_file.to_csv(r'/kaggle/working/coordination.csv', index = None)

In [None]:
coord_data = pd.read_csv('/kaggle/working/coordination.csv')

Then I used the geopandas dataset that contains the necessary features for mapping.

In [None]:
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world.columns=['pop_est', 'continent', 'name', 'CODE', 'gdp_md_est', 'geometry']

Merging the data with the world data.

In [None]:
data = data.merge(world,left_on = 'CNT' ,right_on='CODE', how='left')

Merging the coordination data with data.

In [None]:
data = data.merge(coord_data, on = 'Country', how = 'left')

In [None]:
data = GeoDataFrame(data)

Below, I wanted to create a sub data to contain only country longitude and latitude values. Then I checked the nan values.

In [None]:
sub = data.groupby(['Country'])['latitude','longitude'].mean()
sub[sub['latitude'].isna() == True]

Now I create part of a data that the geometry values are not NaN.

In [None]:
sub_data = data[data['geometry'].isna() == False]

In [None]:
sub_data.head()

In here Macedonia has 'geometry' but the latitude and longitude values are missing. So I researched them on the internet and I gave the values.

In [None]:
sub_data['latitude'][sub_data['Country'] == 'Macedonia, the former Yugoslav Republic of'] = 41.6086
sub_data['longitude'][sub_data['Country'] == 'Macedonia, the former Yugoslav Republic of'] = 21.7453

Visualization

In [None]:
sub_data.plot(column='Score',scheme="quantiles",
           figsize=(25, 20),
           legend=True,cmap='Spectral',)
plt.title('Countries and Scores',fontsize=25)

plt.show()