### **This notebook is made for the visualization final project of information system bachelor student and will be consisting:**
1. Choropleth visualization on world economic freedom index 2019
2. Box & plot visualization of economic freedom index by region
3. Tree heatmap visualization of GDP colored by its economic freedom index
4. Bubble visualization on correlation between economic freedom index and population for every region
5. 3D Scatterplot visualization on rule of law index, government size index, regulatory efficiency index, and open markets index to economic freedom index

# 1. prepare the data

In [None]:
import pandas as pd
df = pd.read_csv('/kaggle/input/the-economic-freedom-index/economic_freedom_index2019_data.csv', encoding='latin1')
print(df)
df.info()

In [None]:
iso = pd.read_csv('/kaggle/input/countries-iso-codes/wikipedia-iso-country-codes.csv', usecols=[0,2])
print(iso)

# 2. visualize the choropleth map

In [None]:
# pip install plotly==4.10.0

In [None]:
dataviz1 = df[["Country Name","2019 Score"]].dropna()
print(dataviz1)

### *check for any missing country name in the ISO dataset*

In [None]:
iso_list = iso['English short name lower case'].tolist()
print(iso_list,'\n',dataviz1['Country Name'].tolist())

In [None]:
for country in dataviz1['Country Name'].tolist():
    if country in iso_list:
        pass
    else:
        print(country)

### *check manually and rename the Country Name if exist*

In [None]:
dataviz1.replace('Burma','Myanmar',inplace=True)
dataviz1.replace('Cabo Verde','Cape Verde',inplace=True)
dataviz1.replace('Congo, Democratic Republic of the Congo','Congo, the Democratic Republic of the',inplace=True)
dataviz1.replace('Congo, Republic of','Congo',inplace=True)
dataviz1.replace('Eswatini','Swaziland',inplace=True)
dataviz1.replace('Korea, North ',"Korea, Democratic People's Republic of",inplace=True)
dataviz1.replace('Korea, South',"Korea, Republic of (South Korea)",inplace=True)
dataviz1.replace('Kyrgyz Republic','Kyrgyzstan',inplace=True)
dataviz1.replace('Laos',"Lao People's Democratic Republic",inplace=True)
dataviz1.replace('Macau','Macao',inplace=True)
dataviz1.replace('Macedonia',"Macedonia, the former Yugoslav Republic of",inplace=True)
dataviz1.replace('Micronesia',"Micronesia, Federated States of",inplace=True)
dataviz1.replace('Moldova','Moldova, Republic of',inplace=True)
dataviz1.replace('São Tomé and Príncipe','Sao Tome and Principe',inplace=True)
dataviz1.replace('Taiwan ','Taiwan',inplace=True)
dataviz1.replace('Tanzania','Tanzania, United Republic of',inplace=True)
dataviz1.replace('United States','United States Of America',inplace=True)
# Kosovo country codes are not found

### *rename the column title so later we can merge those 2 dataframes*

In [None]:
iso.rename(columns={'English short name lower case':'Country Name'},inplace=True)
print(iso,"\n",dataviz1)

In [None]:
dataviz1 = dataviz1.merge(iso, on="Country Name")
print(dataviz1)

### *finally viz'em up!*

In [None]:
import plotly.express as px
viz1 = px.choropleth(dataviz1, locations="Alpha-3 code", color="2019 Score", hover_name="Country Name", color_continuous_scale=px.colors.sequential.Blues)
viz1.update_layout(title='Choropleth Map of Economic Freedom Score 2019')

# 3. visualize the boxplot

### *pick the subset needed*

In [None]:
dataviz2 = df[["Country Name","2019 Score","Region"]].dropna()
print(dataviz2)

### *adjusting group name to fit in the visualization*

In [None]:
list(set(dataviz2['Region']))

In [None]:
dataviz2.replace('Middle East and North Africa','M',inplace=True)
dataviz2.replace('Americas','A',inplace=True)
dataviz2.replace('Europe','E',inplace=True)
dataviz2.replace('Sub-Saharan Africa','S',inplace=True)
dataviz2.replace('Asia-Pacific','P',inplace=True)

### *boxplot'em and group it by Region*

In [None]:
import matplotlib.pyplot as plt
dataviz2.boxplot(column="2019 Score",by="Region",fontsize=14).set_title("2019 World Economic Freedom Index grouped by Region",fontsize=15)
plt.suptitle("") #removing the auto-generated title
plt.show()
print('Region acronym:\n','A = Americas, E = Europe, M = Middle East and North Africa, P = Asia-Pacific, S = Sub-Saharan Africa')

# 4. visualize the treemap

### *get the data ready*

In [None]:
dataviz3 = df[["Country Name","GDP (Billions, PPP)","2019 Score"]].dropna()
dataviz3['GDP (Billions, PPP)'] = dataviz3['GDP (Billions, PPP)'].str.replace(',', '').str.replace('$', '')
dataviz3['GDP (Billions, PPP)'] = pd.to_numeric(dataviz3['GDP (Billions, PPP)'], errors='coerce') #because we want a numeric value
dataviz3.sort_values("GDP (Billions, PPP)", ascending=False, inplace=True)
dataviz3 = dataviz3.dropna() 

### *and now make the treemap*

In [None]:
import plotly.express as px
fig = px.treemap(dataviz3, path=['Country Name'], values='GDP (Billions, PPP)', color='2019 Score', color_continuous_scale=px.colors.sequential.Blues)
fig.update_layout(title='GDP (Billions, PPP) to 2019 Economic Freedom Index Tree Map')
fig.show()

> another option to build treemap is using squarify & matplotlib

In [None]:
# pip install squarify

In [None]:
import squarify
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
colormap = matplotlib.cm.Blues
efi = dataviz3['2019 Score']
mini=min(efi)
maxi=max(efi)
norm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi)
colors = [colormap(norm(value)) for value in efi]

In [None]:
gdp = dataviz3['GDP (Billions, PPP)']
fig = plt.gcf()
ax = fig.add_subplot()
fig.set_size_inches(10,10)
squarify.plot(label=dataviz3['Country Name'][:40], sizes=gdp, color=colors, bar_kwargs={'alpha':.8}, text_kwargs={'fontsize':8}) #labelling only the top40
plt.title("Countries GDP (Billions, PPP) Tree Map colored by its Economic Freedom Index",fontsize=14,fontweight="bold")
plt.axis('off')
plt.show()

# 5. visualize the bubble, ***blob blob blob***

### *insert the BubbleChart class I found on matplotlib documentation*

In [None]:
import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

class BubbleChart:
    def __init__(self, area, bubble_spacing=0):
        """
        Setup for bubble collapse.

        Parameters
        ----------
        area : array-like
            Area of the bubbles.
        bubble_spacing : float, default: 0
            Minimal spacing between bubbles after collapsing.

        Notes
        -----
        If "area" is sorted, the results might look weird.
        """
        area = np.asarray(area)
        r = np.sqrt(area / np.pi)

        self.bubble_spacing = bubble_spacing
        self.bubbles = np.ones((len(area), 4))
        self.bubbles[:, 2] = r
        self.bubbles[:, 3] = area
        self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
        self.step_dist = self.maxstep / 2

        # calculate initial grid layout for bubbles
        length = np.ceil(np.sqrt(len(self.bubbles)))
        grid = np.arange(length) * self.maxstep
        gx, gy = np.meshgrid(grid, grid)
        self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
        self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]

        self.com = self.center_of_mass()

    def center_of_mass(self):
        return np.average(
            self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
        )

    def center_distance(self, bubble, bubbles):
        return np.hypot(bubble[0] - bubbles[:, 0],
                        bubble[1] - bubbles[:, 1])

    def outline_distance(self, bubble, bubbles):
        center_distance = self.center_distance(bubble, bubbles)
        return center_distance - bubble[2] - \
            bubbles[:, 2] - self.bubble_spacing

    def check_collisions(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        return len(distance[distance < 0])

    def collides_with(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        idx_min = np.argmin(distance)
        return idx_min if type(idx_min) == np.ndarray else [idx_min]

    def collapse(self, n_iterations=50):
        """
        Move bubbles to the center of mass.

        Parameters
        ----------
        n_iterations : int, default: 50
            Number of moves to perform.
        """
        for _i in range(n_iterations):
            moves = 0
            for i in range(len(self.bubbles)):
                rest_bub = np.delete(self.bubbles, i, 0)
                # try to move directly towards the center of mass
                # direction vector from bubble to the center of mass
                dir_vec = self.com - self.bubbles[i, :2]

                # shorten direction vector to have length of 1
                dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

                # calculate new bubble position
                new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
                new_bubble = np.append(new_point, self.bubbles[i, 2:4])

                # check whether new bubble collides with other bubbles
                if not self.check_collisions(new_bubble, rest_bub):
                    self.bubbles[i, :] = new_bubble
                    self.com = self.center_of_mass()
                    moves += 1
                else:
                    # try to move around a bubble that you collide with
                    # find colliding bubble
                    for colliding in self.collides_with(new_bubble, rest_bub):
                        # calculate direction vector
                        dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
                        dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
                        # calculate orthogonal vector
                        orth = np.array([dir_vec[1], -dir_vec[0]])
                        # test which direction to go
                        new_point1 = (self.bubbles[i, :2] + orth *
                                      self.step_dist)
                        new_point2 = (self.bubbles[i, :2] - orth *
                                      self.step_dist)
                        dist1 = self.center_distance(
                            self.com, np.array([new_point1]))
                        dist2 = self.center_distance(
                            self.com, np.array([new_point2]))
                        new_point = new_point1 if dist1 < dist2 else new_point2
                        new_bubble = np.append(new_point, self.bubbles[i, 2:4])
                        if not self.check_collisions(new_bubble, rest_bub):
                            self.bubbles[i, :] = new_bubble
                            self.com = self.center_of_mass()

            if moves / len(self.bubbles) < 0.1:
                self.step_dist = self.step_dist / 2

    def plot(self, ax, labels, colors):
        """
        Draw the bubble plot.

        Parameters
        ----------
        ax : matplotlib.axes.Axes
        labels : list
            Labels of the bubbles.
        colors : list
            Colors of the bubbles.
        """
        for i in range(len(self.bubbles)):
            circ = plt.Circle(
                self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i])
            ax.add_patch(circ)
            ax.text(*self.bubbles[i, :2], labels[i],
                    horizontalalignment='center', verticalalignment='center')

### *prepare the data to fit the viz needs*

In [None]:
dataviz4 = df[["Country Name","2019 Score","Population (Millions)","Region"]].dropna()

dataviz4["Population (Millions)"] = pd.to_numeric(dataviz4["Population (Millions)"], errors='coerce')
dataviz4["Population (Millions)"] = dataviz4["Population (Millions)"]*1000 #change the value of this column from millions to thousands
dataviz4.rename(columns={'Population (Millions)':'Population (Thousands)'},inplace=True)
dataviz4.sort_values("Population (Thousands)", ascending=False, inplace=True)

dataviz4.info()

# set(dataviz4['Region'])

dataviz4_americas = dataviz4[dataviz4['Region'] == 'Americas']
dataviz4_asiapacific = dataviz4[dataviz4['Region'] == 'Asia-Pacific']
dataviz4_europe = dataviz4[dataviz4['Region'] == 'Europe']
dataviz4_middleeastnorthafrica = dataviz4[dataviz4['Region'] == 'Middle East and North Africa']
dataviz4_subsabharanafrica = dataviz4[dataviz4['Region'] == 'Sub-Saharan Africa']

labels_americas = dataviz4_americas["Country Name"].tolist()
labels_asiapacific = dataviz4_asiapacific["Country Name"].tolist()
labels_europe = dataviz4_europe["Country Name"].tolist()
labels_middleeastnorthafrica = dataviz4_middleeastnorthafrica["Country Name"].tolist()
labels_subsabharanafrica = dataviz4_subsabharanafrica["Country Name"].tolist()

# americas
colormap_americas = matplotlib.cm.Reds
economic_freedom_index_americas = dataviz4_americas['2019 Score']
mini_americas=min(economic_freedom_index_americas)
maxi_americas=max(economic_freedom_index_americas)
norm_americas = matplotlib.colors.Normalize(vmin=mini_americas, vmax=maxi_americas)
colors_americas = [colormap_americas(norm_americas(value)) for value in economic_freedom_index_americas]

# asia pacific
colormap_asiapacific = matplotlib.cm.Purples
economic_freedom_index_asiapacific = dataviz4_asiapacific['2019 Score']
mini_asiapacific=min(economic_freedom_index_asiapacific)
maxi_asiapacific=max(economic_freedom_index_asiapacific)
norm_asiapacific = matplotlib.colors.Normalize(vmin=mini_asiapacific, vmax=maxi_asiapacific)
colors_asiapacific = [colormap_asiapacific(norm_asiapacific(value)) for value in economic_freedom_index_asiapacific]

# europe
colormap_europe = matplotlib.cm.Blues
economic_freedom_index_europe = dataviz4_europe['2019 Score']
mini_europe=min(economic_freedom_index_europe)
maxi_europe=max(economic_freedom_index_europe)
norm_europe = matplotlib.colors.Normalize(vmin=mini_europe, vmax=maxi_europe)
colors_europe = [colormap_europe(norm_europe(value)) for value in economic_freedom_index_europe]

# middle east and north africa
colormap_middleeastnorthafrica = matplotlib.cm.Greens
economic_freedom_index_middleeastnorthafrica = dataviz4_middleeastnorthafrica['2019 Score']
mini_middleeastnorthafrica=min(economic_freedom_index_middleeastnorthafrica)
maxi_middleeastnorthafrica=max(economic_freedom_index_middleeastnorthafrica)
norm_middleeastnorthafrica = matplotlib.colors.Normalize(vmin=mini_middleeastnorthafrica, vmax=maxi_middleeastnorthafrica)
colors_middleeastnorthafrica = [colormap_middleeastnorthafrica(norm_middleeastnorthafrica(value)) for value in economic_freedom_index_middleeastnorthafrica]

# sub-sabharan africa
colormap_subsabharanafrica = matplotlib.cm.Oranges
economic_freedom_index_subsabharanafrica = dataviz4_subsabharanafrica['2019 Score']
mini_subsabharanafrica=min(economic_freedom_index_subsabharanafrica)
maxi_subsabharanafrica=max(economic_freedom_index_subsabharanafrica)
norm_subsabharanafrica = matplotlib.colors.Normalize(vmin=mini_subsabharanafrica, vmax=maxi_subsabharanafrica)
colors_subsabharanafrica = [colormap_subsabharanafrica(norm_subsabharanafrica(value)) for value in economic_freedom_index_subsabharanafrica]

### *now blow the bubbles pop pop pop*

In [None]:
bubble_chart_americas = BubbleChart(area=dataviz4_americas["Population (Thousands)"],
                           bubble_spacing=0.5)
bubble_chart_americas.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
fig.set_size_inches(20,20)
bubble_chart_americas.plot(
    ax, labels_americas, colors_americas)
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Americas Region Country Population colored by its Economic Freedom Index')

plt.show()

In [None]:
bubble_chart_asiapacific = BubbleChart(area=dataviz4_asiapacific["Population (Thousands)"],
                           bubble_spacing=0.5)
bubble_chart_asiapacific.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
fig.set_size_inches(20,20)
bubble_chart_asiapacific.plot(
    ax, labels_asiapacific, colors_asiapacific)
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Asia Pacific Region Country Population colored by its Economic Freedom Index')

plt.show()

In [None]:
bubble_chart_europe = BubbleChart(area=dataviz4_europe["Population (Thousands)"],
                           bubble_spacing=0.5)
bubble_chart_europe.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
fig.set_size_inches(20,20)
bubble_chart_europe.plot(
    ax, labels_europe, colors_europe)
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Europe Region Country Population colored by its Economic Freedom Index')

plt.show()

In [None]:
bubble_chart_middleeastnorthafrica = BubbleChart(area=dataviz4_middleeastnorthafrica["Population (Thousands)"],
                           bubble_spacing=0.5)
bubble_chart_middleeastnorthafrica.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
fig.set_size_inches(20,20)
bubble_chart_middleeastnorthafrica.plot(
    ax, labels_middleeastnorthafrica, colors_middleeastnorthafrica)
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Middle East and North Africa Region Country Population colored by its Economic Freedom Index')

plt.show()

In [None]:
bubble_chart_subsabharanafrica = BubbleChart(area=dataviz4_subsabharanafrica["Population (Thousands)"],
                           bubble_spacing=0.5)
bubble_chart_subsabharanafrica.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
fig.set_size_inches(20,20)
bubble_chart_subsabharanafrica.plot(
    ax, labels_subsabharanafrica, colors_subsabharanafrica)
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Sub-Sabharan Africa Region Country Population colored by its Economic Freedom Index')

plt.show()

# 6. visualize the 3D scatterplot YAY

In [None]:
import plotly.express as px

rule of law index ('Property Rights', 'Government Integrity', 'Judical Effectiveness')

In [None]:
dataviz5a = df[["Country Name","2019 Score","Property Rights", "Government Integrity", "Judical Effectiveness"]].dropna()

fig = px.scatter_3d(dataviz5a, x="Property Rights", y="Government Integrity", z="Judical Effectiveness", hover_data=dataviz5a.columns,
                    color='2019 Score', size='2019 Score', size_max=30, opacity=0.7, color_continuous_scale=px.colors.sequential.BuPu)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

government size index ('Gov't Spending', 'Tax Burden', 'Fiscal Health')

In [None]:
dataviz5b = df[["Country Name","2019 Score","Gov't Spending", 'Tax Burden', 'Fiscal Health']].dropna()

fig = px.scatter_3d(dataviz5b, x="Gov't Spending", y="Tax Burden", z="Fiscal Health", hover_data=dataviz5b.columns,
                    color='2019 Score', size='2019 Score', size_max=30, opacity=0.7, color_continuous_scale=px.colors.sequential.GnBu)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

regulatory efficiency index ('Business Freedom', 'Labor Freedom', 'Monetary Freedom')

In [None]:
dataviz5c = df[["Country Name","2019 Score",'Business Freedom', 'Labor Freedom', 'Monetary Freedom']].dropna()

fig = px.scatter_3d(dataviz5c, x="Labor Freedom", y="Monetary Freedom", z="Business Freedom", hover_data=dataviz5c.columns,
                    color='2019 Score', size='2019 Score', size_max=30, opacity=0.7, color_continuous_scale=px.colors.sequential.PuRd)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))

open markets index ('Trade Freedom', 'Investment Freedom', 'Financial Freedom')

In [None]:
dataviz5d = df[["Country Name","2019 Score",'Trade Freedom', 'Investment Freedom ', 'Financial Freedom']].dropna()

fig = px.scatter_3d(dataviz5d, x="Trade Freedom", y="Financial Freedom", z="Investment Freedom ", hover_data=dataviz5d.columns,
                    color='2019 Score', size='2019 Score', size_max=30, opacity=0.7, color_continuous_scale=px.colors.sequential.YlOrRd)
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))