In [None]:
import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

In [None]:
colors_dict = {'Ukraine': '#E58606',
             'Moldova': '#5D69B1',
             'Greece': '#52BCA3',
             'Slovakia': '#99C945',
             'Romania': '#A5AA99',
             'Hungary': '#24796C',
             'Spain': '#DAA51B',
             'Israel': '#2F8AC4',
             'Italy': '#764E9F',
             'France': '#ED645A',
             'Canada': '#CC3A8E',
             'Australia': '#CC61B0',
             'Germany': '#88CCEE',
             'Austria': '#DDCC77',
             'Sweden': '#117733',
             'United States': '#332288',
             'Switzerland': '#661100',
             'Other countries': '#f97b72'}

In [None]:
df_path = 'immigrants_emigrants.ods'
df = pd.read_excel(df_path, engine = 'odf', index_col=0)

In [None]:
df

### Per country

In [None]:
emigrants_df = df.loc[df['data_type'] == 'emigrants', :].copy()

emigrants_df['total'] = emigrants_df.loc[:, '1990':'2021'].sum(axis=1)
emigrants_df.sort_values('total', ascending=False, inplace=True)

emigrants_df.head()

In [None]:
immigrants_df = df.loc[df['data_type'] == 'immigrants', :].copy()

immigrants_df['total'] = immigrants_df.loc[:, '1990':'2021'].sum(axis=1)
immigrants_df.sort_values('total', ascending=False, inplace=True)

immigrants_df

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
emigrants_df.plot.bar(y='total', x='country', ax=ax, label='')

ax.get_legend().remove()
ax.axes.get_xaxis().set_label_text('')
plt.xticks(rotation = 90, ha='center');

def add_stars(df, x):
    if df['country'].iloc[x] == 'Other countries':
        return '**'
    
    if len(df.iloc[x, :].dropna()) != len(df.iloc[x, :]):
        return '*'
    return ''

ax.annotate("", xy=(10.5, 82000), xytext=(12, 12000),
            arrowprops=dict(arrowstyle=']-, widthA=8.5, lengthA=1, angleA=343.3', color='g', lw=2))

# ax.arrow(x=10.5, y=80000, dx=12, dy=10000,style=']-', color='r')
ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])
ax.set_xticklabels(['{}{}'.format(emigrants_df['country'].iloc[x], add_stars(emigrants_df, x)) for x in ax.get_xticks().tolist()])
ax.set_ylabel('# people', fontsize=12)

# this is an inset axes over the main axes
# emigrants_df.iloc[-5:, :].plot.bar(y='total', x='country', ax=ax, label='')
# emigrants_df.iloc[-5:, :].axes.bar(y='total', x='country')
l, b, h, w = .45, .50, .3, .45
ax2 = fig.add_axes([l, b, w, h])
emigrants_df.iloc[-5:, :].plot.bar(y='total', x='country', ax=ax2, label='')
ax2.get_legend().remove()
ax2.axes.get_xaxis().set_label_text('')
ax2.set_yticklabels(['{:,}'.format(int(x)) for x in ax2.get_yticks().tolist()])
ax2.set_xticklabels(['{}{}'.format(emigrants_df.iloc[-5:, :]['country'].iloc[x], add_stars(emigrants_df.iloc[-5:, :], x)) for x in ax2.get_xticks().tolist()])
plt.xticks(rotation = 90, ha='center');
ax2.set_ylabel('# people', fontsize=12)

# plt.title('Close up')
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)

ax2.spines.right.set_visible(False)
ax2.spines.top.set_visible(False)

ax.annotate(xy=(12, 8000), xytext=(5,100), textcoords='offset points', text='* do not have data\nstarting from 1990\n\n** does not have\nadditional info\nabout the exact\ndestination', va='center', color='black', fontsize=10)

# plt.xticks([])
# plt.yticks([])

ax.set_title('Emigration from Romania between 1990-2021 based on destination', fontsize=16)

In [None]:
emigrants_per_country_plot = fig

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
immigrants_df.plot.bar(y='total', x='country', ax=ax, label='')

ax.get_legend().remove()
ax.axes.get_xaxis().set_label_text('')
plt.xticks(rotation = 90, ha='center')

ax.annotate("", xy=(6.5, 82000), xytext=(6.5, 29000),
            arrowprops=dict(arrowstyle=']-, widthA=19, lengthA=1, angleA=0', color='g', lw=2))


def add_stars(df, x):
    if df['country'].iloc[x] == 'Other countries':
        return '**'
    
    if len(df.loc[:, '1994': '2021'].iloc[x, :].dropna()) != len(df.loc[:, '1994': '2021'].iloc[x, :]):
        return '*'
    return ''

ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])
ax.set_xticklabels(['{}{}'.format(immigrants_df['country'].iloc[x], add_stars(immigrants_df, x)) for x in ax.get_xticks().tolist()])

l, b, h, w = .37, .40, .35, .55
ax2 = fig.add_axes([l, b, w, h])
immigrants_df.iloc[-8:, :].plot.bar(y='total', x='country', ax=ax2, label='')

ax2.get_legend().remove()
ax2.axes.get_xaxis().set_label_text('')
ax2.set_yticklabels(['{:,}'.format(int(x)) for x in ax2.get_yticks().tolist()])
ax2.set_xticklabels(['{}{}'.format(immigrants_df.iloc[-8:, :]['country'].iloc[x], add_stars(immigrants_df.iloc[-8:, :], x)) for x in ax2.get_xticks().tolist()])
plt.xticks(rotation = 90, ha='center');


# plt.title('Close up')
ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False)
ax.set_ylabel('# people', fontsize=12)

ax2.spines.right.set_visible(False)
ax2.spines.top.set_visible(False)
ax2.set_ylabel('# people', fontsize=12)

ax.annotate(xy=(8.5, 8000), xytext=(5,90), textcoords='offset points', text='** does not have\nadditional info\nabout the exact\norigin', va='center', color='black', fontsize=10);


ax.set_title('Immigration to Romania between 1994-2021 based on origin', fontsize=16, pad=14)

In [None]:
immigrants_per_country_plot = fig

### Top 5 and last 5 years that people emigrated/immigrated most

In [None]:
emigrants_series = emigrants_df.loc[:, '1990':'2021'].sum().sort_values(ascending=False)
immigrants_series = immigrants_df.loc[:, '1994':'2021'].sum().sort_values(ascending=False)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
top_n = 5
emigrants_series.iloc[:top_n].plot.bar(ax=ax[0], label='')

ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);

ax[0].spines.right.set_visible(False)
ax[0].spines.top.set_visible(False);
ax[0].set_ylabel('# people', fontsize=12)

# ax[0].axes.get_xaxis().set_label_text('')
# plt.xticks(rotation = 90, ha='center');
ax[0].tick_params(labelrotation=0)

emigrants_series.iloc[-top_n:].plot.bar(ax=ax[1], label='')
ax[1].set_yticklabels(['{:,}'.format(int(x)) for x in ax[1].get_yticks().tolist()]);

ax[1].spines.right.set_visible(False)
ax[1].spines.top.set_visible(False);
ax[1].tick_params(labelrotation=0)
ax[1].set_ylabel('# people', fontsize=12)

fig.suptitle('Top and last 5 years that people emigrated most', fontsize=16, y=0.9856)

In [None]:
emigrants_top_bottom_5_plot = fig

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
top_n = 5
immigrants_series.iloc[:top_n].plot.bar(ax=ax[0], label='')

ax[0].set_yticklabels(['{:,}'.format(int(x)) for x in ax[0].get_yticks().tolist()]);

ax[0].spines.right.set_visible(False)
ax[0].spines.top.set_visible(False);
ax[0].set_ylabel('# people', fontsize=12)

# ax[0].axes.get_xaxis().set_label_text('')
# plt.xticks(rotation = 90, ha='center');
ax[0].tick_params(labelrotation=0)

immigrants_series.iloc[-top_n:].plot.bar(ax=ax[1], label='')
ax[1].set_yticklabels(['{:,}'.format(int(x)) for x in ax[1].get_yticks().tolist()]);

ax[1].spines.right.set_visible(False)
ax[1].spines.top.set_visible(False);
ax[1].tick_params(labelrotation=0)
ax[1].set_ylabel('# people', fontsize=12)

fig.suptitle('Top and last 5 years that people immigrated most', fontsize=16)

In [None]:
immigrants_top_bottom_5_plot = fig

### Total number of emigrants/immigrants

In [None]:
total_emigrants = emigrants_df.loc[:, '1990':'2021'].sum().sum()

In [None]:
total_immigrants = immigrants_df.loc[:, '1990':'2021'].sum().sum()

In [None]:
total_emigrants

In [None]:
total_immigrants

### Line plot over years

In [None]:
fig, ax = plt.subplots(4, 4, figsize=(14, 14))
fig.tight_layout(pad=5.0)

fig.text(0.5, 0.01, 'years', ha='center', fontsize=16)
fig.text(0.01, 0.5, '# people', va='center', rotation='vertical', fontsize=16)

for i, country in enumerate(list(emigrants_df['country'])):
    row = emigrants_df[emigrants_df['country'] ==  country]#.dropna(axis=1)

    
    from_column = [x for x, y in zip(row.iloc[:, 1:].columns, row.iloc[:, 1:].isnull().values[0]) if not y][0]
    to_column = '2021'
    
    max_no_emigrants = row.loc[:, from_column:to_column].fillna(0).values.max()
    
    idx = int(i/4)
    jdx = i % 4 
    ax[idx, jdx].set_ylim([0, max_no_emigrants + max_no_emigrants * 0.05])

    labels = list(row.loc[:,from_column:to_column].columns)

    color = colors_dict.get(country, colors_dict['Romania']) 
    
    row.loc[:, from_column:to_column].T.plot.line(ax=ax[idx, jdx], label=country, linewidth=3, color=color, solid_capstyle='round')

#     ax[idx, jdx].set_xlim(left = 0)

    ax[idx, jdx].set_yticklabels(['{:,}'.format(int(x)) for x in ax[idx, jdx].get_yticks().tolist()])

    # Hide the right and top spines
    ax[idx, jdx].spines.right.set_visible(False)
    ax[idx, jdx].spines.top.set_visible(False)

    text = '{}{}'.format(country, '' if from_column == '1990' else '*')
    ax[idx, jdx].set_title(text)

    ax[idx, jdx].get_legend().remove()


fig.delaxes(ax[-1, -1])
# ax.annotate(xy=(27, 8000), xytext=(0,0), textcoords='offset points', text='* do not have data\nstarting from 1990', va='center', color='black', fontsize=18)
fig.text(0.85, 0.15, '* do not have data\nstarting from 1990', ha='center', fontsize=12)

fig.suptitle('Emigration over time between 1990-2021 per country', fontsize=20, y=1)

In [None]:
emigrants_line_plot = fig

In [None]:
fig, ax = plt.subplots(3, 4, figsize=(14, 14))
fig.tight_layout(pad=5.0)
fig.text(0.5, 0.01, 'years', ha='center', fontsize=16)
fig.text(0.01, 0.5, '# people', va='center', rotation='vertical', fontsize=16)

for i, country in enumerate(list(immigrants_df['country'])):
    row = immigrants_df[immigrants_df['country'] ==  country]#.dropna(axis=1)

    
    from_column = [x for x, y in zip(row.iloc[:, 1:].columns, row.iloc[:, 1:].isnull().values[0]) if not y][0]
    to_column = '2021'
    
    max_no_immigrants = row.loc[:, from_column:to_column].fillna(0).values.max()
    
    idx = int(i/4)
    jdx = i % 4 
    ax[idx, jdx].set_ylim([0, max_no_immigrants + max_no_immigrants * 0.05])

    labels = list(row.loc[:,from_column:to_column].columns)

    color = colors_dict.get(country, colors_dict['Romania']) 

    row.loc[:, from_column:to_column].T.plot.line(ax=ax[idx, jdx], label=country, linewidth=3, color=color, solid_capstyle='round')

#     ax[idx, jdx].set_xlim(left = 0)

    ax[idx, jdx].set_yticklabels(['{:,}'.format(int(x)) for x in ax[idx, jdx].get_yticks().tolist()])

    # Hide the right and top spines
    ax[idx, jdx].spines.right.set_visible(False)
    ax[idx, jdx].spines.top.set_visible(False)

    ax[idx, jdx].set_title(country)

    ax[idx, jdx].get_legend().remove()


fig.delaxes(ax[-1, -1])


fig.suptitle('Immigration over time between 1994-2021 per country', fontsize=20, y=1)

In [None]:
immigrants_line_plot = fig

### Emigration and Immigration over time

In [None]:
fig, ax = plt.subplots(figsize=(13, 5))

emigrants_sum = emigrants_df.loc[:, '1990':'2021'].sum()
emigrants_sum.plot(lw=3, color='#DC3535')
x = len(emigrants_df.loc[:, '1990':'2021'].columns) - 1
y = emigrants_sum[-1]
ax.annotate(xy=(x, y), xytext=(5,0), textcoords='offset points', text='Emigrants*', va='center', weight='bold', fontsize=10, color=ax.get_lines()[-1].get_color())

immigrants_sum = immigrants_df.loc[:, '1990':'2021'].sum(min_count=1)
immigrants_sum.plot(lw=3, color='#54B435')

x = len(immigrants_df.loc[:, '1990':'2021'].columns) - 1
y = immigrants_sum[-1]
ax.annotate(xy=(x, y), xytext=(5,0), textcoords='offset points', text='Immigrants', va='center', weight='bold', fontsize=10, color=ax.get_lines()[-1].get_color())


ax.set_xlim(left = 0)

ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()]);

ax.set_ylabel('# people', fontsize=12)
ax.set_xlabel('years', fontsize=12)

ax.spines.right.set_visible(False)
ax.spines.top.set_visible(False);
# plt.xticks(fontsize=20);
# plt.yticks(fontsize=20);

ax.annotate(xy=(27, 8000), xytext=(0,0), textcoords='offset points', text='* do not have data\nstarting from 1990', va='center', color='black', fontsize=10)


ax.set_title('Emigration and Immigration over time between 1990-2021', fontsize=16, pad=20)

In [None]:
emigrants_immigrants_line_plot = fig

In [None]:
# emigrants_per_country_plot
# immigrants_per_country_plot
# emigrants_top_bottom_5_plot
# immigrants_top_bottom_5_plot
# emigrants_line_plot
# immigrants_line_plot

# emigrants_immigrants_line_plot

### useless code

### Box plot

In [None]:
# fig, ax = plt.subplots(figsize=(8, 8))

# for position, (idx, row) in enumerate(emigrants_df.iterrows()):
#     ax.boxplot(list(row['1990':'2021'].fillna(0)), positions=[position])
#     # break

# # ax.set_xticks(range(position+1))
# ax.set_xticklabels(emigrants_df['country']);
# plt.xticks(rotation = 90, ha='center');


In [None]:
# # vivid #E58606,#5D69B1,#52BCA3,#99C945,#CC61B0,#24796C,#DAA51B,#2F8AC4,#764E9F,#ED645A,#CC3A8E,#A5AA99
# # https://carto.com/carto-colors/

# colors = "#E58606","#5D69B1","#52BCA3","#99C945","#CC61B0","#24796C","#DAA51B","#2F8AC4","#764E9F","#ED645A","#CC3A8E","#A5AA99","#88CCEE","#DDCC77","#117733","#332288"

# colors_dict = dict(zip(emigrants_df['country'], colors))

# fig, ax = plt.subplots(figsize=(10, 10))

# from_column = '1990'
# to_column = '2021'

# max_no_emigrants = emigrants_df.loc[:, from_column:to_column].fillna(0).values.max()
# plt.ylim([0, max_no_emigrants + max_no_emigrants * 0.05])

# labels = list(emigrants_df.loc[:,from_column:to_column].columns)

# for idx, row in emigrants_df.iterrows():
#     color = colors_dict[row[0]]
#     row[from_column:to_column].plot.line(ax=ax, label=row[0], linewidth=3, color=color, solid_capstyle='round')
    
#     x = len(labels) - 1
#     y = row[from_column:to_column].dropna().iloc[-1]
# #     if row[0] == 'Sweden':
# #         y += 1300
# #     elif row[0] == 'Austria':
# #         y += 500
# #     elif row[0] == 'Germany':
# #         y -=400
        
#     if len(row[from_column:to_column].dropna()) != len(row[from_column:to_column]):
#         text = '{}*'.format(row[0])
#     else:
#         text = row[0]
#     ax.annotate(xy=(x, y), xytext=(5,0), textcoords='offset points', text=text, va='center', color=color, weight='bold', fontsize=10)

# # ax.annotate(xy=(24, 2000), xytext=(5,0), textcoords='offset points', text='* do not have data starting from 1990', va='center', color='black', fontsize=10)

# ax.set_xticks(ticks=range(len(labels)), labels=labels, rotation=45)

# ax.set_xlim(left = 0)

# ax.set_yticklabels(['{:,}'.format(int(x)) for x in ax.get_yticks().tolist()])

# # Hide the right and top spines
# ax.spines.right.set_visible(False)
# ax.spines.top.set_visible(False)

# plt.locator_params(axis='x', nbins=11);
# # plt.title('GDP per capita (current US$) between 1990 and 2021', loc='center', fontdict={'fontsize':20});