# Change between chromosome naming nomenclatures easily!

Have problems on chromosome naming nomenclatures? GenomeInfo provides a simple interface to interchange them!

In [1]:
!pip3 install assemblyinfo

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import assemblyinfo

In [3]:
# use the connect() method to retrieve our database!

db = assemblyinfo.connect()

In [5]:
# you can easily see what assemblies are available for each species using 'get_species_info()'

db.get_species_info("homo_sapiens")



In [10]:
# if you only need AssemblyInfo as wrapper to extract names of assembled chromosomes:

hg38 = db.assembly_info("hg38", roles=["assembled"], provider="ncbi")
hg38.chromnames

['1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 'X',
 'Y',
 'MT']

In [11]:
# what if (for some strange reason) we need the names of unplaced sequences in mm10?

mm10 = db.assembly_info("mm10", roles=["unplaced"], provider="ncbi")
mm10.chromnames

['MSCHRUN_CTG1',
 'MSCHRUN_CTG13',
 'MSCHRUN_CTG14',
 'MSCHRUN_CTG21',
 'MSCHRUN_CTG2',
 'MSCHRUN_CTG22',
 'MSCHRUN_CTG19',
 'MSCHRUN_CTG16',
 'MSCHRUN_CTG3',
 'MSCHRUN_CTG20',
 'MSCHRUN_CTG4',
 'MSCHRUN_CTG5',
 'MSCHRUN_CTG6',
 'MSCHRUN_CTG7',
 'MSCHRUN_CTG17',
 'MSCHRUN_CTG18',
 'MSCHRUN_CTG9',
 'MSCHRUN_CTG10',
 'MSCHRUN_CTG11',
 'MSCHRUN_CTG12',
 'MSCHRUN_CTG15',
 'MSCHRUN_CTG23']

In [12]:
# now unplace and unlocalized sequences, but in UCSC format?

mm10 = db.assembly_info("mm10", roles=["unplaced", "unlocalized"], provider="ucsc")
mm10.chromnames

['chr1_GL456210_random',
 'chr1_GL456211_random',
 'chr1_GL456212_random',
 'chr1_GL456213_random',
 'chr1_GL456221_random',
 'chr4_GL456216_random',
 'chr4_GL456350_random',
 'chr4_JH584292_random',
 'chr4_JH584293_random',
 'chr4_JH584294_random',
 'chr4_JH584295_random',
 'chr5_GL456354_random',
 'chr5_JH584296_random',
 'chr5_JH584297_random',
 'chr5_JH584298_random',
 'chr5_JH584299_random',
 'chr7_GL456219_random',
 'chrX_GL456233_random',
 'chrY_JH584300_random',
 'chrY_JH584301_random',
 'chrY_JH584302_random',
 'chrY_JH584303_random',
 'chrUn_GL456239',
 'chrUn_GL456359',
 'chrUn_GL456360',
 'chrUn_GL456366',
 'chrUn_GL456367',
 'chrUn_GL456368',
 'chrUn_GL456370',
 'chrUn_GL456372',
 'chrUn_GL456378',
 'chrUn_GL456379',
 'chrUn_GL456381',
 'chrUn_GL456382',
 'chrUn_GL456383',
 'chrUn_GL456385',
 'chrUn_GL456387',
 'chrUn_GL456389',
 'chrUn_GL456390',
 'chrUn_GL456392',
 'chrUn_GL456393',
 'chrUn_GL456394',
 'chrUn_GL456396',
 'chrUn_JH584304']