A notebook to explore migration-related code usage at the individual level

Import relevant functions

In [1]:
import pyarrow.feather as feather
import pandas as pd
import numpy as np

Load the migration event-level dataset

In [6]:
migration_coding_data = feather.read_feather("../../output/cohorts/migration_event_level_dataset/migration_related_codes.arrow")

migration_coding_data

Unnamed: 0,patient_id,date,snomedct_code,migration_category
0,1,2013-01-24,85621000000107,Main/first language is not English
1,2,2009-10-07,297687000,Main/first language is not English
2,3,2010-10-12,297394005,Main/first language is not English
3,3,2011-02-07,99751000000107,Main/first language is not English
4,3,2011-04-18,363651000000100,Main/first language is not English
...,...,...,...,...
4306,1320,2008-06-08,972541000000105,Main/first language is not English
4307,1320,2013-04-06,297332006,Main/first language is not English
4308,1320,2014-12-16,94751000000106,Main/first language is not English
4309,1320,2021-10-13,297424004,Main/first language is not English


Change null to other (i.e. for codes that are migration-related but in none of the other migration-related codelists)

In [16]:

if "Other" not in migration_coding_data["migration_category"].cat.categories:
    migration_coding_data["migration_category"] = migration_coding_data["migration_category"].cat.add_categories(["Other"])

migration_coding_data["migration_category"] = migration_coding_data["migration_category"].fillna("Other")

# Group by patient_id and create concatenated migration category variable

grouped = (
    migration_coding_data.groupby("patient_id")["migration_category"]
    .agg(lambda x: " - ".join(sorted(set(x.dropna()))))
    .reset_index(name="all_migration_categories")
)

grouped

grouped["all_migration_categories"].value_counts(dropna=False)



all_migration_categories
Main/first language is not English - country of birth                                                      399
Main/first language is not English                                                                         335
country of birth                                                                                            83
Main/first language is not English - Other - country of birth                                               69
Main/first language is not English - Other                                                                  38
Asylum or refugee status - Main/first language is not English - country of birth                            26
Asylum or refugee status - Main/first language is not English                                               14
Asylum or refugee status - Main/first language is not English - Other - country of birth                    12
Other                                                                                  

In [13]:
migration_coding_data["migration_category"].value_counts(dropna=False)

migration_coding_data[migration_coding_data["migration_category"]== "Other"]

Unnamed: 0,patient_id,date,snomedct_code,migration_category
41,10,1996-07-21,103738006,Other
145,41,2005-10-01,841311000000103,Other
153,43,2018-07-03,148586001,Other
172,43,2023-12-28,1050481000000109,Other
204,47,2018-02-27,103738006,Other
...,...,...,...,...
4160,1269,1998-07-20,189151000000100,Other
4214,1294,2001-02-19,315355003,Other
4217,1296,1985-11-08,444799005,Other
4261,1302,2017-07-25,411771000000105,Other
