In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
from scipy import stats
import statsmodels.stats.api as sms
import matplotlib.pyplot as plt
import matplotlib.style as style
import plotly.figure_factory as ff
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.colors import LinearSegmentedColormap
sns.set_theme(style='whitegrid')

In [42]:
var_df = pd.read_csv("~/Nextcloud/linkedin_recruiter/inputs/variation.csv")
var_df['flow_variation_pct'] = var_df['flow_variation'] * 100
eu_var = var_df[var_df['eu_uk'] == 1]

In [56]:
px.scatter(
    var_df, x='flow_variation_pct', y=np.log(var_df['flow_median']),
    hover_data={'country_orig': True, 'country_dest': True, 'flow_median': ':,', 'flow_std': ':.2f',
                'flow_variation_pct': ':.2f', 'region_dest': False},
    color='region_dest',
    title="Global median flows (ln-transformed) vs. coefficient of variation (%)",
    labels={"flow_variation_pct": "CV (%)",  "y": "median flow (ln)",
            "region_dest": "Destination Region", 'country_orig': 'Origin',
            'country_dest': 'Destination', 'flow_median': 'median flow', 'flow_std': 'SD flow'},
    template="simple_white"
)

In [61]:
px.scatter(
    var_df, x='flow_variation_pct', y=np.log(var_df['users_dest_median']),
    hover_data={'country_orig': True, 'country_dest': True, 'users_dest_median': ':,', 'flow_std': ':.2f',
                'flow_variation_pct': ':.2f', 'region_dest': False},
    color='region_dest',
    title="Global median users in destination (ln-transformed) vs. coefficient of variation (%)",
    labels={"flow_variation_pct": "CV (%)",  "y": "Median Users in Destination (ln)",
            "region_dest": "Destination Region", 'country_orig': 'Origin Country',
            'country_dest': 'Destination Country', 'users_dest_median': 'median users in destination', 'flow_std': 'SD flow'},
    template="simple_white"
)

In [62]:
eu_var = eu_var.assign(
    y=lambda x: 'from '+ x['midregion_orig'] + ' to ' + x['midregion_dest']
)
eu_var['direction'] = eu_var['y']
eu_var.loc[eu_var['midregion_dest'] == eu_var['midregion_orig'], 'direction'] = 'within ' + eu_var['midregion_orig']
px.scatter(
    eu_var, x='flow_variation_pct', y=np.log(eu_var['flow_median']),
    hover_data={'country_orig': True, 'country_dest': True, 'flow_std': ':.2f',
                'flow_variation_pct': ':.2f', 'direction': False, 'flow_median': ':,'},
    title="EU + UK median flows (ln-transformed) vs. coefficient of variation (%)",
    labels={"flow_variation_pct": "CV (%)",  "y": "Median Flow (ln)",
            "direction": "Direction", 'country_orig': 'Origin Country',
            'country_dest': 'Destination Country', 'flow_median': 'median flow', 'flow_std': 'SD flow'},
    color='direction', template='simple_white'
)