# Imports

In [2]:
%run standard_imports.ipynb
%run plotting_setup.ipynb

python 3.4.1 |Anaconda 2.1.0 (x86_64)| (default, Sep 10 2014, 17:24:09) 
[GCC 4.2.1 (Apple Inc. build 5577)]
numpy 1.9.2
scipy 0.14.0
pandas 0.14.1
numexpr 2.3.1
pysam 0.8.3
petl 1.0.10
petlx 1.0.3
vcf 0.6.7
h5py 2.3.1
tables 3.1.1
vcfplt 0.8


# Setup

In [3]:
DATA_DIR = '/data/plasmodium/pfalciparum/recon/sqnm_assay_test_1'
CACHE_DIR = DATA_DIR + '/cache'
# !mkdir -p {CACHE_DIR}

SQNM_EXCEL_FN = DATA_DIR + '/Sqnm_data_DK1066_W1378_20150603.xlsx'
SAMPLE_MANIFEST_FN = DATA_DIR + '/PGV4_mk5-RDP.xlsx'

REF_GENOME = '/data/plasmodium/pfalciparum/recon/roamato/Pf3D7_v3/3D7_sorted.fa'

PGV4_VCF_FN = '/nfs/team112_internal/production_files/Pf/4_0/pf_4_0_20140712_vfp1.newCoverageFilters_pass_5_99.5.HyperHet.vcf.gz'

BCFTOOLS_EXE = '/Users/rpearson/src/github/malariagen/methods-dev/pf3k_techbm/opt/bcftools/bcftools-1.2/bcftools'
GATK_EXE = 'java -jar /Users/rpearson/src/github/malariagen/methods-dev/recon/opt/gatk/GenomeAnalysisTK.jar'
BWA_EXE = '/Users/rpearson/src/github/malariagen/methods-dev/recon/opt/bwa/bwa-0.7.12/bwa'

# Functions

In [4]:
def etlcache(f):
    fn = os.path.join(CACHE_DIR, f.__name__)
    if not os.path.exists(fn):
        etl.topickle(f(), fn)
    return etl.frompickle(fn)
    
def nocache(f):
    fn = os.path.join(CACHE_DIR, f.__name__)
    if os.path.exists(fn):
        os.remove(fn)
    return f()


# Load data

In [5]:
@etlcache
def tbl_sample_sets():
    return (etl
        .fromxlsx(SQNM_EXCEL_FN, 'sample_sets')
#         .convertnumbers()
    )
print(len(tbl_sample_sets.data()))
tbl_sample_sets

384


sample_code,source_code,donor_source_code,country_code,384_sample_set_code,384_position_code,96_sample_set_code,96_position_code
PH0043-C,722519,722519,KH,DK1066,A01,PH150527-1,A01
PN0004-C,pfXUA010,pfXUA010,PG,DK1066,A02,PF150527-1,A01
PH0457-CW,PF860-2MACS_WGA,PF860-2MACS,KH,DK1066,A03,PH150527-1,A02
PN0005-C,pfXUA013,pfXUA013,PG,DK1066,A04,PF150527-1,A02
PH0002-C,CP185,CP185,KH,DK1066,A05,PH150527-1,A03


In [6]:
@etlcache
def tbl_pgv4_sample_manifest():
    return (etl
        .fromxlsx(SAMPLE_MANIFEST_FN, 'PGV4.0', data_only=True)
#         .convertnumbers()
    )
print(len(tbl_pgv4_sample_manifest.data()))
tbl_pgv4_sample_manifest

5729


Num,Sample,Release,Pf3k v3,StudyCode,AlfrescoNumber,AlfrescoCode,Study,Country,LabSample,LowTypability,PcaOutlier,IsDuplicate,ManualExlusion,Exclude,Duplicate,SolarisCountry,SolarisLocation,Code,Fws,Location,Region,Year,KhCluster,V3Clusters,Notes
1,PA0007-C,3_0,True,RV_3606,1006,1006-PF-GM-CONWAY,PF6,GM,False,False,False,False,,False,-,GM,"greater Banjul area, Royal Victoria Teaching Hospital","GM_greater Banjul area, Royal Victoria Teaching Hospital",0.995549265848836,Banjul,GM,,,,
2,PA0008-C,3_0,True,RV_3614,1006,1006-PF-GM-CONWAY,PF6,GM,False,False,False,False,,False,-,GM,"greater Banjul area, Royal Victoria Teaching Hospital","GM_greater Banjul area, Royal Victoria Teaching Hospital",0.380544083595885,Banjul,GM,,,,
3,PA0011-C,3_0,True,RV_3642,1006,1006-PF-GM-CONWAY,PF6,GM,False,False,False,False,,False,-,GM,"greater Banjul area, Royal Victoria Teaching Hospital","GM_greater Banjul area, Royal Victoria Teaching Hospital",0.999316200656244,Banjul,GM,,,,
4,PA0012-C,3_0,True,RV_3650,1006,1006-PF-GM-CONWAY,PF6,GM,False,False,False,False,,False,-,GM,"greater Banjul area, Royal Victoria Teaching Hospital","GM_greater Banjul area, Royal Victoria Teaching Hospital",0.999922696134752,Banjul,GM,,,,
5,PA0015-C,3_0,True,RV_3687,1006,1006-PF-GM-CONWAY,PF6,GM,False,False,False,False,,False,-,GM,"greater Banjul area, Royal Victoria Teaching Hospital","GM_greater Banjul area, Royal Victoria Teaching Hospital",0.999552759064621,Banjul,GM,,,,


In [8]:
def unique_sample_code(rec):
    if rec[2] == 'WATER':
        return('WATER_%s' % rec[1])
    if rec[2] == 'WT3243-C':
        return('CEPH_human_pool_%s' % rec[1])
    if rec[2].startswith('WT'):
        return('%s_%s_%s' % (rec[2], rec[3], rec[1]))
    else:
        return(rec[2])

def extract_chrom(rec):
    vals = rec[0].split('_')
    chrom = "%s_%s_%s" % (vals[3], vals[4], vals[5])
    return(chrom)
    
def extract_pos(rec):
    vals = rec[0].split('_')
    pos = int(vals[6])
    return(pos)


In [9]:
@etlcache
def tbl_pivoted_genotypes():
    return (etl
        .fromxlsx(SQNM_EXCEL_FN, 'pivoted_genotypes', data_only=True)
        .addfield('unique_sample_id', unique_sample_code)
#         .cutout(['plate_code', 'position-code', 'sample_code', 'source_code'])
#         .cutout(list(range(4)))
        .cut([14] + list(range(4, 14)))
        .sort('unique_sample_id')
        .transpose()
        .addfield('chrom', extract_chrom)
        .addfield('pos', extract_pos)
        .cut([385, 386] + list(range(1, 385)))
        .sort(['chrom', 'pos'])
        .rename('PH0833-C', 'PH0883-C')
    )
print(len(tbl_pivoted_genotypes.data()))
tbl_pivoted_genotypes

10


chrom,pos,CEPH_human_pool_M18,CEPH_human_pool_N16,CEPH_human_pool_N18,CEPH_human_pool_O14,CEPH_human_pool_O16,CEPH_human_pool_O18,CEPH_human_pool_P12,CEPH_human_pool_P14,CEPH_human_pool_P16,CEPH_human_pool_P18,PA0011-C,PA0015-C,PA0016-C,PA0018-C,PA0020-C,PA0028-C,PA0029-C,PA0035-C,PA0036-C,PA0037-C,PA0047-C,PA0049-C,PA0149-C,PA0150-C,PA0151-C,PA0152-C,PA0157-C,PA0158-C,PA0165-C,PA0167-C,PA0176-C,PA0180-C,PA0181-C,PA0183-C,PC0001-C,PC0007-C,PC0011-C,PC0013-C,PC0023-C,PC0031-C,PC0032-C,PC0044-C,PC0047-C,PC0051-C,PC0054-C,PC0063-C,PD0004-C,PD0005-01,PD0005-C,PD0008-02,PD0016-C,PD0019-C,PD0020-C,PD0030-C,PD0031-C,PD0033-C,PD0037-C,PD0038-C,PD0045-C,PD0048-C,PD0056-C,PD0058-C,PD0060-C,PD0067-C,PD0068-C,PD0073-C,PD0075-C,PD0076-C,PD0079-C,PD0090-C,PD0093-C,PD0098-C,PD0103-C,PD0105-C,PD0115-Cx,PD0126-C,PD0134-C,PD0135-C,PD0136-C,PD0137-C,PD0459-Cx,PD0671-C,PD0672-C,PE0011-C,PE0012-C,PE0013-C,PE0016-C,PE0019-C,PE0020-C,PE0021-C,PE0022-C,PE0023-C,PE0025-C,PE0030-C,PF0008-C,PF0011-C,PF0020-C,PF0025-C,PF0035-C,PF0037-C,PF0040-C,PF0042-C,PF0047-C,PF0048-C,PF0052-C,PF0054-C,PF0058-C,PF0062-C,PF0065-C,PF0073-C,PF0077-C,PF0085-C,PF0092-C,PG0001-Cx,PG0002-Cx,PG0004-Cx,PG0008-C,PG0009-C,PG0010-C,PG0011-C,PG0049-Cx2,PG0075-C,PG0122-C,PG0123-C,PH0002-C,PH0003-C,PH0008-C,PH0012-C,PH0013-C,PH0016-C,PH0017-C,PH0019-C,PH0020-C,PH0022-C,PH0024-C,PH0027-C,PH0028-Cx,PH0029-Cx,PH0042-C,PH0043-C,PH0209-C,PH0212-C,PH0218-C,PH0221-C,PH0224-C,PH0225-C,PH0232-C,PH0234-C,PH0235-C,PH0236-C,PH0284-C,PH0286-CW,PH0303-CW,PH0326-C,PH0337-C,PH0349-C,PH0375-C,PH0456-C,PH0457-CW,PH0458-CW,PH0460-C,PH0462-C,PH0463-C,PH0464-CW,PH0465-CW,PH0466-C,PH0467-CW,PH0468-C,PH0474-CW,PH0478-C,PH0485-C,PH0500-CW,PH0501-CW,PH0773-Cx,PH0883-C,PH0860-Cx,PH0891-Cx,PH0942-Cx,PH0959-Cx,PH0961-Cx,PH0966-Cx,PH0971-C,PH0973-C,PH0974-Cx,PJ0001-C,PJ0002-C,PJ0003-C,PK0055-C,PK0056-C,PK0060-C,PK0063-C,PK0066-C,PK0067-C,PK0068-C,PK0069-C,PK0070-C,PK0073-C,PK0075-C,PM0004-C,PM0005-C,PM0015-C,PM0017-C,PM0020-C,PM0021-C,PM0033-C,PM0037-C,PM0047-C,PM0062-C,PM0063-C,PM0068-C,PM0090-C,PM0097-C,PM0113-C,PM0115-C,PM0142-C,PN0004-C,PN0005-C,PN0006-C,PN0007-C,PN0010-C,PN0014-C,PN0015-C,PN0016-C,PN0022-C,PN0023-C,PN0042-C,PN0044-C,PN0053-C,PN0056-C,PN0057-C,PN0073-C,PN0074-C,PN0079-C,PV0002-C,PV0004-C,PV0006-C,PV0014-C,PV0015-C,PV0017-C,PV0018-C,PV0028-C,PV0039-C,PV0048-C,PV0249-Cx,PV0308-Cx,PV0315-Cx,PV0331-Cx,PZ0011-CW,QE0393-Cx,QE0403-Cx,QE0405-Cx,WATER_B03,WATER_C06,WATER_D07,WATER_E10,WATER_I24,WATER_J22,WATER_K20,WATER_L13,WATER_L15,WATER_L17,WATER_L18,WATER_L19,WATER_L21,WATER_L23,WATER_M16,WATER_N14,WATER_O12,WATER_P10,WT3335-C_T996_50_50_3D7_I04,WT3336-C_T996_50_50_DD2_I06,WT3337-C_T996_50_50_HB3_I08,WT3338-C_T996_50_50_ITA4_I10,WT3339-C_T996_50_50_7G8_I12,WT3341-C_T996_50_50_PH21_I16,WT3342-C_T996_50_50_PH22_I18,WT3343-C_T996_50_50_W2_I20,WT3344-C_T996_50_50_CS2_I22,WT3345-C_3D7_50_50_DD2_J04,WT3346-C_3D7_50_50_HB3_J06,WT3347-C_3D7_50_50_ITA4_J08,WT3348-C_3D7_50_50_7G8_J10,WT3350-C_3D7_50_50_PH21_J14,WT3351-C_3D7_50_50_PH22_J16,WT3352-C_3D7_50_50_W2_J18,WT3353-C_3D7_50_50_CS2_J20,WT3354-C_DD2_50_50_HB3_K04,WT3355-C_DD2_50_50_ITA4_K06,WT3356-C_DD2_50_50_7G8_K08,WT3358-C_DD2_50_50_PH21_K12,WT3359-C_DD2_50_50_PH22_K14,WT3360-C_DD2_50_50_W2_K16,WT3361-C_DD2_50_50_CS2_K18,WT3362-C_HB3_50_50_ITA4_L04,WT3363-C_HB3_50_50_7G8_L06,WT3365-C_HB3_50_50_PH21_L10,WT3366-C_HB3_50_50_PH22_L12,WT3367-C_HB3_50_50_W2_L14,WT3368-C_HB3_50_50_CS2_L16,WT3369-C_ITA4_50_50_7G8_M04,WT3371-C_ITA4_50_50_PH21_M08,WT3372-C_ITA4_50_50_PH22_M10,WT3373-C_ITA4_50_50_W2_M12,WT3374-C_ITA4_50_50_CS2_M14,WT3376-C_7G8_50_50_PH21_N06,WT3377-C_7G8_50_50_PH22_N08,WT3378-C_7G8_50_50_W2_N10,WT3379-C_7G8_50_50_CS2_N12,WT3384-C_PH21_50_50_PH22_P04,WT3385-C_PH21_50_50_W2_P06,WT3386-C_PH21_50_50_CS2_P08,WT3387-C_PH22_50_50_W2_L22,WT3388-C_PH22_50_50_CS2_L24,WT3389-C_W2_50_50_CS2_K24,WT3390-C_3D7_16_84_HuDNA_M20,WT3391-C_3D7_3.2_96.8_HuDNA_M01,WT3391-C_3D7_3.2_96.8_HuDNA_N01,WT3391-C_3D7_3.2_96.8_HuDNA_O01,WT3391-C_3D7_3.2_96.8_HuDNA_O20,WT3391-C_3D7_3.2_96.8_HuDNA_P01,WT3392-C_3D7_1.6_98.4_HuDNA_M22,WT3393-C_3D7_0.32_99.68_HuDNA_O22,WT3394-C_3D7_0.16_99.84_HuDNA_M24,WT3395-C_3D7_0.032_99.968_HuDNA_O24,WT3396-C_3D7_0.016_99.984_HuDNA_N20,WT3397-C_3D7_0.0032_99.9968_HuDNA_P20,WT3398-C_3D7_0.0016_99.9984_HuDNA_N22,WT3399-C_3D7_0.00032_99.99968_HuDNA_P22,WT3400-C_3D7_0.00016_99.99984_HuDNA_N24,WT3401-C_3D7_0.000032_99.99968_HuDNA_P24,WT3512-C_T996_50_50_T994_I14,WT3513-C_3D7_50_50_T994_J12,WT3514-C_DD2_50_50_T994_K10,WT3515-C_HB3_50_50_T994_L08,WT3516-C_ITA4_50_50_T994_M06,WT3517-C_7G8_50_50_T994_N04,WT3519-C_PH21_50_50_T994_O04,WT3520-C_PH22_50_50_T994_O06,WT3569-C_T994_50_50_W2_O08,WT3570-C_T994_50_50_CS2_O10,WT3903-C_3D7_0.8_99.2_HuDNA_M03,WT3903-C_3D7_0.8_99.2_HuDNA_N03,WT3903-C_3D7_0.8_99.2_HuDNA_O03,WT3903-C_3D7_0.8_99.2_HuDNA_P03,WT3904-C_3D7_0.2_99.8_HuDNA_M05,WT3904-C_3D7_0.2_99.8_HuDNA_N05,WT3904-C_3D7_0.2_99.8_HuDNA_O05,WT3904-C_3D7_0.2_99.8_HuDNA_P05,WT3905-C_3D7_0.05_99.95_HuDNA_M07,WT3905-C_3D7_0.05_99.95_HuDNA_N07,WT3905-C_3D7_0.05_99.95_HuDNA_O07,WT3905-C_3D7_0.05_99.95_HuDNA_P07,WT3906-C_3D7_0.0125_99.9875_HuDNA_M09,WT3906-C_3D7_0.0125_99.9875_HuDNA_N09,WT3906-C_3D7_0.0125_99.9875_HuDNA_O09,WT3906-C_3D7_0.0125_99.9875_HuDNA_P09,WT3909-C_3D7_0.0000122_99.9999878_HuDNA_M19,WT3909-C_3D7_0.0000122_99.9999878_HuDNA_N19,WT3909-C_3D7_0.0000122_99.9999878_HuDNA_O19,WT3909-C_3D7_0.0000122_99.9999878_HuDNA_P19,WT3977-C_3D7_0.00313_99.99687_HuDNA_M11,WT3977-C_3D7_0.00313_99.99687_HuDNA_N11,WT3977-C_3D7_0.00313_99.99687_HuDNA_O11,WT3977-C_3D7_0.00313_99.99687_HuDNA_P11,WT3978-C_3D7_0.00078_99.99922_HuDNA_M13,WT3978-C_3D7_0.00078_99.99922_HuDNA_N13,WT3978-C_3D7_0.00078_99.99922_HuDNA_O13,WT3978-C_3D7_0.00078_99.99922_HuDNA_P13,WT3979-C_3D7_0.000195_99.999805_HuDNA_M15,WT3979-C_3D7_0.000195_99.999805_HuDNA_N15,WT3979-C_3D7_0.000195_99.999805_HuDNA_O15,WT3979-C_3D7_0.000195_99.999805_HuDNA_P15,WT3980-C_3D7_0.00049_99.99951_HuDNA_M17,WT3980-C_3D7_0.00049_99.99951_HuDNA_N17,WT3980-C_3D7_0.00049_99.99951_HuDNA_O17,WT3980-C_3D7_0.00049_99.99951_HuDNA_P17,WT3981-C_3D7_0.0000031_99.9999969_HuDNA_M21,WT3981-C_3D7_0.0000031_99.9999969_HuDNA_N21,WT3981-C_3D7_0.0000031_99.9999969_HuDNA_O21,WT3981-C_3D7_0.0000031_99.9999969_HuDNA_P21,WT3982-C_3D7_0.00000076_99.99999924_HuDNA_M23,WT3982-C_3D7_0.00000076_99.99999924_HuDNA_N23,WT3982-C_3D7_0.00000076_99.99999924_HuDNA_O23,WT3982-C_3D7_0.00000076_99.99999924_HuDNA_P23
Pf3D7_05_v3,958440,X,X,X,X,X,X,X,X,X,X,T,T,A,T,A,T,A,A,T,T,A,A,T,T,A,T,T,T,A,A,T,A,A,T,A,A,A,AT,T,T,T,A,A,A,T,A,A,A,A,T,A,A,A,A,A,A,A,A,A,T,A,T,T,A,A,A,T,A,A,A,A,A,A,A,A,A,A,A,AT,A,A,X,X,A,T,A,A,A,A,T,A,T,AT,AT,A,T,A,T,T,T,AT,T,T,AT,T,T,A,T,T,A,A,AT,T,A,A,AT,A,A,A,A,T,T,A,T,A,A,T,A,T,T,T,A,A,T,A,A,T,A,A,T,T,T,T,T,T,T,A,T,A,T,T,T,A,A,T,A,A,A,A,T,T,A,T,T,A,A,AT,T,AT,AT,T,T,T,A,A,T,T,A,AT,T,A,A,A,A,X,X,T,AT,T,AT,T,AT,T,T,T,A,A,T,T,T,T,A,T,A,A,T,T,T,A,T,A,T,T,A,A,T,A,A,A,A,A,A,A,A,A,A,A,X,A,A,A,A,A,A,A,A,A,A,A,A,T,A,A,A,A,A,A,AT,A,A,A,X,X,X,X,T,T,T,X,X,X,X,X,X,X,T,X,T,X,AT,AT,T,AT,T,AT,T,AT,AT,A,AT,A,AT,A,AT,A,A,AT,A,AT,A,AT,A,A,AT,T,AT,T,AT,AT,AT,A,AT,A,A,AT,T,AT,AT,AT,A,A,AT,AT,A,A,A,A,A,A,A,A,A,A,A,A,A,A,X,X,X,AT,A,A,AT,A,AT,A,AT,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,AT,A,X,X,X,X,X,A,X,X,X,X,X,X,X,X,A,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Pf3D7_07_v3,405362,X,X,X,X,X,X,X,X,X,X,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,X,X,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,G,A,A,G,G,G,G,A,A,G,G,G,A,G,G,A,G,G,G,G,G,A,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,G,A,A,A,G,G,G,G,G,G,G,G,A,G,A,G,G,A,G,G,G,G,G,G,A,G,G,G,A,AG,AG,X,X,A,A,A,A,AG,AG,A,AG,AG,A,A,AG,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,X,A,A,A,A,A,A,A,A,A,A,A,G,G,A,A,G,A,A,A,A,A,A,A,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,A,AG,A,AG,A,AG,AG,AG,AG,AG,A,AG,A,AG,AG,AG,AG,AG,G,AG,G,G,G,G,AG,A,AG,AG,AG,AG,AG,G,G,G,G,AG,AG,AG,AG,G,G,G,G,G,G,A,A,A,A,A,A,A,A,A,A,A,A,A,X,X,X,AG,AG,G,AG,G,AG,G,G,G,G,A,A,A,A,A,A,A,A,A,A,A,A,A,A,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Pf3D7_07_v3,405600,X,X,X,X,X,X,X,X,X,X,C,T,T,C,C,C,C,C,C,C,T,C,T,T,T,T,T,T,T,T,T,T,T,C,T,T,T,T,T,T,T,T,T,T,T,T,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,X,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,CT,T,T,T,T,T,T,T,C,T,T,C,T,T,C,C,C,T,C,C,T,C,C,C,C,C,T,C,C,C,C,C,C,C,C,C,C,C,T,C,T,C,C,C,C,C,T,T,T,C,C,C,C,C,C,C,C,T,C,T,C,C,T,C,C,C,CT,C,C,T,C,C,C,T,CT,CT,X,X,T,T,C,T,T,T,T,T,T,T,T,T,T,T,T,T,T,C,T,T,T,C,C,C,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,X,T,T,T,T,T,T,T,T,T,T,T,C,C,T,T,C,T,T,T,T,T,T,T,X,X,X,X,T,CT,CT,X,X,X,X,X,X,X,CT,X,X,X,T,CT,T,T,T,CT,CT,CT,T,CT,T,T,T,CT,CT,CT,T,CT,CT,CT,C,C,C,CT,T,T,CT,CT,CT,T,T,CT,CT,CT,T,CT,CT,CT,T,C,C,CT,C,CT,CT,T,T,T,T,T,T,T,T,T,T,T,T,X,X,X,X,T,T,CT,T,T,T,CT,CT,CT,T,T,T,T,T,T,T,T,T,T,T,T,T,T,T,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,X,X,X,X,X,CT,X,T,X,X,X,X,T,X,X
Pf3D7_13_v3,748395,X,X,X,X,X,X,X,X,X,X,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,A,A,A,A,A,A,AC,A,A,A,A,A,A,A,C,C,C,A,AC,A,A,A,C,A,A,A,A,A,C,C,A,A,A,A,A,X,X,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,A,C,C,A,A,A,A,C,C,A,A,A,C,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,A,C,C,A,A,A,A,A,A,A,A,C,A,A,A,AC,A,A,A,C,C,A,A,C,A,A,A,C,AC,AC,X,X,A,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,X,C,C,C,A,C,C,C,C,C,C,C,C,A,C,C,C,C,C,C,C,C,C,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,AC,C,AC,C,AC,AC,AC,AC,AC,C,AC,C,AC,AC,AC,AC,AC,A,AC,A,A,A,A,AC,C,AC,AC,AC,AC,AC,A,A,A,A,AC,AC,AC,AC,A,A,A,A,A,A,C,C,C,C,C,C,C,C,C,C,C,C,C,C,X,X,AC,AC,A,AC,A,AC,A,A,A,A,C,C,C,C,C,C,C,C,C,C,C,C,X,X,X,X,X,X,X,X,C,X,X,X,X,X,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X
Pf3D7_13_v3,1725259,X,X,X,X,X,X,X,X,X,X,CT,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,CT,C,C,C,C,C,CT,C,C,C,CT,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,X,X,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,CT,C,CT,C,CT,CT,CT,CT,C,CT,CT,CT,C,T,C,T,C,C,C,C,C,C,C,C,C,C,T,C,C,C,T,T,T,T,T,C,C,C,T,T,CT,C,C,T,C,C,C,T,T,T,C,T,C,CT,C,T,C,T,C,T,CT,T,C,C,T,T,C,T,T,T,C,T,T,X,X,T,C,C,C,C,C,C,C,C,C,C,C,C,C,C,CT,CT,C,C,C,C,CT,C,C,C,C,C,C,C,CT,C,C,C,C,CT,C,C,C,C,C,C,X,C,C,C,C,CT,C,C,C,C,C,C,C,T,C,CT,C,C,C,C,C,C,C,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,C,C,C,C,C,C,C,C,CT,C,C,C,C,C,C,C,C,C,C,C,C,C,C,CT,CT,C,C,C,C,CT,C,C,C,C,C,C,C,C,CT,CT,C,CT,CT,CT,C,CT,CT,CT,CT,CT,CT,CT,CT,CT,CT,T,T,X,X,X,C,C,C,C,C,CT,CT,C,C,C,CT,CT,CT,CT,CT,CT,CT,CT,CT,T,CT,CT,CT,T,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,X


In [10]:
len(tbl_pivoted_genotypes.toarray()[1])

386