In [30]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np

In [33]:
migrants = pd.read_csv("migrantsOcc.csv", index_col=False, thousands=',')
pops = pd.read_csv("censusOcc.csv", index_col=False, thousands=',')

In [34]:
migrants.fillna(method='ffill', inplace=True)
pops.fillna(method='ffill', inplace=True)

In [35]:
migrants = migrants[migrants['AGE5P Age in 5 year groups'] != 'Total']
pops = pops[pops['AGE5P - Age in Five Year Groups'] != 'Total']

In [36]:
migrants.columns

Index(['AGE5P Age in 5 year groups', 'SEXP Sex', 'OCCP Occupation of person',
       'INDP Industry of employment', 'GNGP Public/Private sector', 'Persons'],
      dtype='object')

In [37]:
migrants['AGE5P Age in 5 year groups'].unique()

array(['15-19 years', '20-24 years', '25-29 years', '30-34 years',
       '35-39 years', '40-44 years', '45-49 years', '50-54 years',
       '55-59 years', '60-64 years', '65-69 years', '70-74 years',
       '75-79 years', '80-84 years', '85-89 years', '90-94 years',
       '95-99 years', '100 years and over'], dtype=object)

In [38]:
pops['AGE5P - Age in Five Year Groups'].unique()

array(['15-19 years', '20-24 years', '25-29 years', '30-34 years',
       '35-39 years', '40-44 years', '45-49 years', '50-54 years',
       '55-59 years', '60-64 years', '65-69 years', '70-74 years',
       '75-79 years', '80-84 years', '85-89 years', '90-94 years',
       '95-99 years', '100 years and over'], dtype=object)

In [39]:
migrantsAge = migrants.groupby(['AGE5P Age in 5 year groups', 'SEXP Sex'])['Persons'].sum().reset_index()
popsAge = pops.groupby(['AGE5P - Age in Five Year Groups', 'SEXP Sex'])['Persons'].sum().reset_index()

In [40]:
migrantsAge['Ratio'] = migrantsAge['Persons']*100 / popsAge['Persons']

In [41]:
migrantsAge['Ratio'] = np.where(migrantsAge['SEXP Sex'] == 'Male', migrantsAge['Ratio'] * -1, migrantsAge['Ratio'])

In [42]:
px.bar(migrantsAge, y='AGE5P Age in 5 year groups', x='Ratio', color='SEXP Sex', title='Migrants as percentage of workforce')

In [45]:
pops.columns

Index(['AGE5P - Age in Five Year Groups', 'SEXP Sex', 'INDP - 1 Digit Level',
       'OCCP - 1 Digit Level', 'GNGP Public/Private Sector', 'Persons'],
      dtype='object')

In [46]:
migrantsOcc = migrants.groupby(['OCCP Occupation of person'])['Persons'].sum().reset_index()
popsOcc = pops.groupby(['OCCP - 1 Digit Level'])['Persons'].sum().reset_index()

migrantsOcc['Ratio'] = migrantsOcc['Persons']*100 / popsOcc['Persons']

migrantsOcc['Ratio'] = np.where(migrantsOcc['Ratio'] > 100, 100, migrantsOcc['Ratio'])

px.bar(migrantsOcc, x='OCCP Occupation of person', y='Ratio', title='Migrants as percentage of workforce, by occupation')

In [50]:
migrantsInd = migrants.groupby(['INDP Industry of employment'])['Persons'].sum().reset_index()
popsInd = pops.groupby(['INDP - 1 Digit Level'])['Persons'].sum().reset_index()

migrantsInd['Ratio'] = migrantsInd['Persons']*100 / popsInd['Persons']

migrantsInd['Ratio'] = np.where(migrantsInd['Ratio'] > 100, 100, migrantsInd['Ratio'])

px.bar(migrantsInd, x='INDP Industry of employment', y='Ratio', title='Migrants as percentage of workforce, by industry')

In [53]:
migrantsInd = migrants.groupby(['INDP Industry of employment', 'OCCP Occupation of person'])['Persons'].sum().reset_index()
popsInd = pops.groupby(['INDP - 1 Digit Level', 'OCCP - 1 Digit Level'])['Persons'].sum().reset_index()

migrantsInd['Ratio'] = migrantsInd['Persons']*100 / popsInd['Persons']

migrantsInd['Ratio'] = np.where(migrantsInd['Ratio'] > 100, 100, migrantsInd['Ratio'])

migrantsInd['Category'] = migrantsInd['INDP Industry of employment'] + ' - ' + migrantsInd['OCCP Occupation of person']

migrantsInd = migrantsInd.sort_values(by='Ratio', ascending=False)

px.bar(migrantsInd.head(10), x='Category', y='Ratio', title='Migrants as percentage of workforce, by industry and occupation')

In [72]:
migrantsInd = migrants.groupby(['INDP Industry of employment', 'OCCP Occupation of person', 'AGE5P Age in 5 year groups'])['Persons'].sum().reset_index()
popsInd = pops.groupby(['INDP - 1 Digit Level', 'OCCP - 1 Digit Level', 'AGE5P - Age in Five Year Groups'])['Persons'].sum().reset_index()

migrantsInd['Ratio'] = migrantsInd['Persons'] / popsInd['Persons']

migrantsInd['Ratio'] = np.where(migrantsInd['Ratio'] > 100, 100, migrantsInd['Ratio'])

migrantsInd['Category'] = migrantsInd['INDP Industry of employment'] + ' - ' + migrantsInd['OCCP Occupation of person'] + ' - ' + migrantsInd['AGE5P Age in 5 year groups']

migrantsInd = migrantsInd.sort_values(by='Ratio', ascending=False)

In [80]:
fig = px.bar(migrantsInd.head(10), x='Category', y='Ratio', title='Migrants as percentage of workforce, by industry and occupation (10 highest)', color='AGE5P Age in 5 year groups')
#fig.update_traces(marker_color = '#f9e1d1')
fig.update_xaxes(visible=False)
fig.update_yaxes()
fig.show()

In [81]:
fig.write_html("migrantsInd.html")

In [55]:
px.bar(migrantsOcc, x='OCCP Occupation of person', y='Persons', title='Migrants by occupation')

In [57]:
365375.1/migrantsOcc['Persons'].sum()

0.30291261121750074