# Realignment

In [1]:
import pandas as pd
from Bio import AlignIO as alio
from Bio import SeqIO as sqio
import subprocess
import tqdm
import numpy as np
import tarfile
import os

In [2]:
def alignment_to_table(file):
    out = []
    for seq in alio.read(open(file), 'fasta'):
        out.append(dict(id=seq.id, seq=str(seq.seq).replace('-', '')))
    return pd.DataFrame.from_records(out)
    

In [3]:
aln_D = alignment_to_table('../sequences/AGNifAlign105.ext-anc.alt.D.fasta')
aln_K = alignment_to_table('../sequences/AGNifAlign105.ext-anc.alt.K.fasta')
aln_H = alignment_to_table('../sequences/AGNifAlign105.ext-anc.alt.H.fasta')

aln_DK = pd.merge(aln_D, aln_K, on='id', how='inner', suffixes=['_D', '_K'])
aln_DKH = pd.merge(aln_DK, aln_H, on='id', how='inner').rename(columns={'seq': 'seq_H'})
assert(len(aln_DKH) == len(aln_D))
aln_DKH

Unnamed: 0,id,seq_D,seq_K,seq_H
0,1207_alt4,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,ASKEEVEKVLEWTKTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...
1,1207_alt5,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,ASAEEVQKVKDWTNTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...
2,1207_map,MSEKEETQKLIEEVLEVYPEKARKNRKKHIAVNDPEASSCAVKSNV...,CTKEEVEKVADWTNTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...
3,1207_alt2,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,HTPEEVERVKDWTNTEEYKEKNFARKALVINPAKACQPLGAMLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...
4,1207_alt3,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,HTKEEVQEVAEWTNTEEYKEKNFARKALVINPAKACQPLGALLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...
...,...,...,...,...
2684,Nif_archaeon_BMS3Bbin15,MLLKCDKTIPERKKHIVIKGENGCGGDSSGCEIACNVPTTPGDMTE...,MSIVTKQNRAVAINPTRSCAPIGAMLANYGVHGALTINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAASLARIGKKIMVVGCDPKADCTRL...
2685,Nif_Candidatus_Viridilinea_mediisalina,MKLKCNATLPDRALHIALKTSEGGCRRGDGTDCFIASNSATTPGDM...,MSCVTTQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...
2686,Nif_Chloroflexales_bacterium_ZM16-3,MELKSSTTIPERAQHIALKVEGGKCQRGDGAGCAIVSNSATTPGDM...,MSCVTTQDRAVSINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQIAFYGKGGIGKSTTQQNTAAALASMGNKIMVVGCDPKADCTRL...
2687,Nif_Oscillochloris_trichoides,MQFKCNETLPERGTHIALKVAGGGCQRGDGTSCGIVSNSATTPGDM...,MSCVTLQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAAFASMGNKLMVVGCDPKADCTRL...


In [4]:
def place_anc_tag(x):
    try:
        int(x.split('_')[0])
        return 'Anc_' + x
    except:
        return x

aln_DKH['id'] = aln_DKH['id'].apply(place_anc_tag)

In [5]:
aln_DKH['DDKK'] = aln_DKH.apply(lambda x: x['seq_D'] + ':' + x['seq_K'] + ':' + x['seq_D'] + ':' + x['seq_K'], axis=1)
aln_DKH['HH'] = aln_DKH.apply(lambda x: x['seq_H'] + ':' + x['seq_H'], axis=1)
aln_DKH.loc[15].HH

'MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRLILHAKAQATVMDKVRELGTVEDLELEDVLKRGYGDVKCVESGGPEPGVGCAGRGVITAINFLEEEGAYTPDLDYVFYDVLGDVVCGGFAMPIRENKAQEIYIVVSGEMMAMYAANNICKGIVKYASSGSVRLAGLICNSRNTDREADLIEALAKRLGTQMIHFVPRDNQVQRAELRRMTVIEYSPEHKQAEEYRQLAQKIADNKMFVVPTPLEMDELEDLLMEFGIMEAEDESIVGKAENA:MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRLILHAKAQATVMDKVRELGTVEDLELEDVLKRGYGDVKCVESGGPEPGVGCAGRGVITAINFLEEEGAYTPDLDYVFYDVLGDVVCGGFAMPIRENKAQEIYIVVSGEMMAMYAANNICKGIVKYASSGSVRLAGLICNSRNTDREADLIEALAKRLGTQMIHFVPRDNQVQRAELRRMTVIEYSPEHKQAEEYRQLAQKIADNKMFVVPTPLEMDELEDLLMEFGIMEAEDESIVGKAENA'

In [6]:
aln_DKH['type'] = aln_DKH['id'].apply(lambda x: x.split('_')[0])
aln_DKH['type'].unique()
aln_DKH_anc = aln_DKH.query('type == "Anc"').copy()
aln_DKH_anc['parent'] = aln_DKH_anc['id'].apply(lambda x: '_'.join(x.split('_')[:2]))
aln_DKH_anc['parent']

0       Anc_1207
1       Anc_1207
2       Anc_1207
3       Anc_1207
4       Anc_1207
          ...   
2299    Anc_1534
2300    Anc_1534
2301    Anc_1525
2302     Anc_821
2303     Anc_821
Name: parent, Length: 2304, dtype: object

In [7]:
aln_DKH_anc['len_D']  = aln_DKH_anc['seq_D'].apply(len)
aln_DKH_anc['len_K']  = aln_DKH_anc['seq_K'].apply(len)

aln_DKH_anc['seq_DK'] = aln_DKH_anc.apply(lambda x: x['seq_D'] + ':' + x['seq_K'], axis=1)
aln_DKH_anc['variant'] = aln_DKH_anc['id'].apply(lambda x: x.split('_')[-1])
aln_DKH_anc = aln_DKH_anc.query('variant != "map"')
aln_DKH_anc

Unnamed: 0,id,seq_D,seq_K,seq_H,DDKK,HH,type,parent,len_D,len_K,seq_DK,variant
0,Anc_1207_alt4,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,ASKEEVEKVLEWTKTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,alt4
1,Anc_1207_alt5,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,ASAEEVQKVKDWTNTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,alt5
3,Anc_1207_alt2,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,HTPEEVERVKDWTNTEEYKEKNFARKALVINPAKACQPLGAMLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,alt2
4,Anc_1207_alt3,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,HTKEEVQEVAEWTNTEEYKEKNFARKALVINPAKACQPLGALLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,alt3
5,Anc_1210_alt3,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MSEASACTETTPEEVEQVAEWINTEEYKEKNFAREALVINPAKACQ...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1210,481,484,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,alt3
...,...,...,...,...,...,...,...,...,...,...,...,...
2298,Anc_1534_alt5,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MSAVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,alt5
2300,Anc_1534_alt2,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MSIVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,alt2
2301,Anc_1525_alt3,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,LNQTSMSAITKKRSVTINPAKTCQPIGAMYACLGVHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,Anc,Anc_1525,456,452,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,alt3
2302,Anc_821_alt4,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,LDHTSVETITNRKSLVINPAKTCQPIGAMYAALGIHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,Anc,Anc_821,462,455,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,alt4


In [8]:
def write_fasta(x, path):
    with open(path + x.id + '.fasta', 'w') as f:
        f.write('>101\t102\n{1}\n'.format(x.id, x.sequence.replace(':', '')))


In [14]:
aln_DKH_anc.copy().rename(columns={'seq_DK': 'sequence'}).apply(
    lambda x: write_fasta(x, './re-alignment/'), 
    axis=1
)

0       None
1       None
2       None
3       None
4       None
        ... 
2299    None
2300    None
2301    None
2302    None
2303    None
Length: 2304, dtype: object

    ls *_altall.fasta | sed "s/_altall.fasta//g" | xargs -P 4 -I % mafft --retree 1 --keeplength --addfragments  %_map.reference.fasta %_altall.fasta >   %_altall.realigned.fasta

## Processing template files

We need to remove the first two lines of every A3M file that we will use as template. 

In [24]:
aln_DKH_anc

Unnamed: 0,id,seq_D,seq_K,seq_H,DDKK,HH,type,parent,len_D,len_K,seq_DK,variant
0,Anc_1207_alt4,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,ASKEEVEKVLEWTKTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,alt4
1,Anc_1207_alt5,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,ASAEEVQKVKDWTNTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,alt5
3,Anc_1207_alt2,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,HTPEEVERVKDWTNTEEYKEKNFARKALVINPAKACQPLGAMLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,alt2
4,Anc_1207_alt3,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,HTKEEVQEVAEWTNTEEYKEKNFARKALVINPAKACQPLGALLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,alt3
5,Anc_1210_alt3,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MSEASACTETTPEEVEQVAEWINTEEYKEKNFAREALVINPAKACQ...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1210,481,484,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,alt3
...,...,...,...,...,...,...,...,...,...,...,...,...
2298,Anc_1534_alt5,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MSAVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,alt5
2300,Anc_1534_alt2,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MSIVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,alt2
2301,Anc_1525_alt3,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,LNQTSMSAITKKRSVTINPAKTCQPIGAMYACLGVHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,Anc,Anc_1525,456,452,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,alt3
2302,Anc_821_alt4,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,LDHTSVETITNRKSLVINPAKTCQPIGAMYAALGIHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,Anc,Anc_821,462,455,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,alt4


In [26]:
aln_DKH_anc.query('variant == "altall"')

Unnamed: 0,id,seq_D,seq_K,seq_H,DDKK,HH,type,parent,len_D,len_K,seq_DK,variant
6,Anc_1211_altall,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,MSETCAIKKVTENTPEEVERVKEWINTEEYKEKNFAREALVINPAH...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1211,486,487,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,altall
7,Anc_1212_altall,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,MSEACALVKKVTEHTPEEVERVEEWINTEEYKEKNFAREALVINPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1212,486,488,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,altall
12,Anc_1214_altall,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,MSAEAAVAKPVTEHTPEEIERVEEWINTEEYKEKNFAREALVVNPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1214,480,488,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,altall
17,Anc_1215_altall,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,MSAEAAVKPVTEHTPEEIERVEEWINTEEYKEKNFAREALVVNPAH...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1215,480,487,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,altall
25,Anc_1213_altall,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,MSEACALVKPVTEHTPEEVERVEEWINSEEYKEKNFAREALVINPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1213,482,488,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,altall
...,...,...,...,...,...,...,...,...,...,...,...,...
2276,Anc_1533_altall,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,MSVVAKQKRAVAINPARLCAPIGAMMANMGVHGAISIVHGSQGCAT...,MRQIAFYGKGGIGKSTTQQNTAAALARLGKKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,MRQIAFYGKGGIGKSTTQQNTAAALARLGKKIMVVGCDPKADCTRL...,Anc,Anc_1533,466,463,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,altall
2277,Anc_1534_altall,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,MSCVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGKKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGKKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,altall
2279,Anc_1535_altall,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,MSCVTMQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,Anc,Anc_1535,475,473,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,altall
2289,Anc_1537_altall,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,MSCVTTQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,Anc,Anc_1537,479,473,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,altall


In [9]:
def add_header(x):
    print(f'adding header to {x.id}')
    with open('re-alignment/' + x.id + '.realigned.fasta') as f:
        content = f.read()
    with open('re-alignment/' + x.id + '.realigned.fasta', 'w') as f:
        f.write(
            '#{0},{1}\t2,2\n'.format(x.len_D, x.len_K)
        )
        f.write(content)

In [30]:
aln_DKH_anc.query('variant == "altall"').copy().apply(lambda x: add_header(x), axis=1)

adding header to Anc_1211_altall
adding header to Anc_1212_altall
adding header to Anc_1214_altall
adding header to Anc_1215_altall
adding header to Anc_1213_altall
adding header to Anc_1217_altall
adding header to Anc_1218_altall
adding header to Anc_1216_altall
adding header to Anc_1210_altall
adding header to Anc_1220_altall
adding header to Anc_1221_altall
adding header to Anc_1222_altall
adding header to Anc_1219_altall
adding header to Anc_1209_altall
adding header to Anc_1223_altall
adding header to Anc_1226_altall
adding header to Anc_1228_altall
adding header to Anc_1229_altall
adding header to Anc_1227_altall
adding header to Anc_1225_altall
adding header to Anc_1224_altall
adding header to Anc_1239_altall
adding header to Anc_1240_altall
adding header to Anc_1243_altall
adding header to Anc_1244_altall
adding header to Anc_1245_altall
adding header to Anc_1242_altall
adding header to Anc_1241_altall
adding header to Anc_1247_altall
adding header to Anc_1246_altall
adding hea

6       None
7       None
12      None
17      None
25      None
        ... 
2276    None
2277    None
2279    None
2289    None
2290    None
Length: 384, dtype: object

In [10]:
def add_header(x):
    print(f'adding header to {x.id}')
    with open('re-alignment/' + x.id + '.a3m') as f:
        content = f.read()
    with open('re-alignment/' + x.id + '.a3m', 'w') as f:
        f.write(
            '#{0},{1}\t2,2\n'.format(x.len_D, x.len_K)
        )
        f.write(content)

In [11]:
aln_DKH_anc.query('variant != "altall"').copy().apply(lambda x: add_header(x), axis=1)

adding header to Anc_1207_alt4
adding header to Anc_1207_alt5
adding header to Anc_1207_alt2
adding header to Anc_1207_alt3
adding header to Anc_1210_alt3
adding header to Anc_1213_alt5
adding header to Anc_1213_alt3
adding header to Anc_1213_alt2
adding header to Anc_1214_alt5
adding header to Anc_1214_alt3
adding header to Anc_1215_alt2
adding header to Anc_1215_alt3
adding header to Anc_1215_alt4
adding header to Anc_1215_alt5
adding header to Anc_1214_alt4
adding header to Anc_1214_alt2
adding header to Anc_1213_alt4
adding header to Anc_1216_alt4
adding header to Anc_1216_alt5
adding header to Anc_1216_alt2
adding header to Anc_1216_alt3
adding header to Anc_1217_alt3
adding header to Anc_1217_alt2
adding header to Anc_1217_alt4
adding header to Anc_1217_alt5
adding header to Anc_1218_alt2
adding header to Anc_1218_alt3
adding header to Anc_1218_alt5
adding header to Anc_1218_alt4
adding header to Anc_1212_alt3
adding header to Anc_1212_alt5
adding header to Anc_1212_alt2
adding h

0       None
1       None
3       None
4       None
5       None
        ... 
2298    None
2300    None
2301    None
2302    None
2303    None
Length: 1536, dtype: object

## Re-alignment, HH

In [8]:
aln_DKH_anc

Unnamed: 0,id,seq_D,seq_K,seq_H,DDKK,HH,type,parent,len_D,len_K,seq_DK,variant
0,Anc_1207_alt4,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,ASKEEVEKVLEWTKTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKIMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSKKEEKEELIEEILDVYPEKARKNREKHIAVNDPDSGQCAVKSNV...,alt4
1,Anc_1207_alt5,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,ASAEEVQKVKDWTNTEEYKEKNFKRKALVINPAKACQPLGAVLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSENEERKEIIEEVLEVYPEKARKNRKKHLAVNDPDAASCAVKSNV...,alt5
3,Anc_1207_alt2,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,HTPEEVERVKDWTNTEEYKEKNFARKALVINPAKACQPLGAMLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSEDEQSKKLVEEVLNVYPEKARKNRAKHVAVNDPDAGSCVVKSNV...,alt2
4,Anc_1207_alt3,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,HTKEEVQEVAEWTNTEEYKEKNFARKALVINPAKACQPLGALLAAL...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1207,475,476,MSTKEQTQKIVEEVLEIYPEKARKNRRKHLAVNDPGANSCSVKSNV...,alt3
5,Anc_1210_alt3,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MSEASACTETTPEEVEQVAEWINTEEYKEKNFAREALVINPAKACQ...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,MRQIAIYGKGGIGKSTTTQNTVAALAEMGKKVMIVGCDPKADSTRL...,Anc,Anc_1210,481,484,MSEKEDTTKETTQKLIEEVLEIYPEKARKNRAKHLAVNDPASSSCA...,alt3
...,...,...,...,...,...,...,...,...,...,...,...,...
2298,Anc_1534_alt5,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MSAVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALATMGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGKGDGTGCEIACNVPTTPGDMT...,alt5
2300,Anc_1534_alt2,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MSIVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGNKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGENGCGRGDGSGCEIACNVPTTPGDMT...,alt2
2301,Anc_1525_alt3,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,LNQTSMSAITKKRSVTINPAKTCQPIGAMYACLGVHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,MRQIAIYGKGGIGKSTTTQNTAAALACMMGKKVMIVGCDPKADSTR...,Anc,Anc_1525,456,452,MPLKLLECDKTIPEREKHIYIKDPGEPVLPMCNVKTTPGDMTERGC...,alt3
2302,Anc_821_alt4,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,LDHTSVETITNRKSLVINPAKTCQPIGAMYAALGIHGAIPLVHGSQ...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,MRQIAIYGKGGIGKSTTTQNTTAALASMGKKIMIVGCDPKADSTRL...,Anc,Anc_821,462,455,EMPTTILEPDKARSERTKHIYIKDDAEPIIECNVKTMPGIMTERGC...,alt4


In [24]:
aln_DKH_anc.query('variant == "altall"')

Unnamed: 0,id,seq_D,seq_K,seq_H,DDKK,HH,type,parent,len_D,len_K,seq_DK,variant
6,Anc_1211_altall,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,MSETCAIKKVTENTPEEVERVKEWINTEEYKEKNFAREALVINPAH...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1211,486,487,MSEKETEVKPEVDGISKETTQKLIEETLEVYPEKARKKRAPHLAAN...,altall
7,Anc_1212_altall,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,MSEACALVKKVTEHTPEEVERVEEWINTEEYKEKNFAREALVINPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1212,486,488,MSDKEPEIKVEIDGITKERTQKLIEETLEVYPEKARKKRAPHLAAN...,altall
12,Anc_1214_altall,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,MSAEAAVAKPVTEHTPEEIERVEEWINTEEYKEKNFAREALVVNPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1214,480,488,MSERKPVKGITKERTEKLIEETLAEMPEKAQKKRAPHLAANDPSAS...,altall
17,Anc_1215_altall,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,MSAEAAVKPVTEHTPEEIERVEEWINTEEYKEKNFAREALVVNPAH...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1215,480,487,MSERKPVKGVTKERTEKLIEETLAEMPEKAQKKRAPHLGANDPSAS...,altall
25,Anc_1213_altall,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,MSEACALVKPVTEHTPEEVERVEEWINSEEYKEKNFAREALVINPA...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,MRQIAIYGKGGIGKSTTTQNTVAGLASLGKKVMIVGCDPKADSTRL...,Anc,Anc_1213,482,488,MSEKEIKEIDGITKEKTQKLIEETLELYPEKARKKRAPHLAANDPS...,altall
...,...,...,...,...,...,...,...,...,...,...,...,...
2276,Anc_1533_altall,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,MSVVAKQKRAVAINPARLCAPIGAMMANMGVHGAISIVHGSQGCAT...,MRQIAFYGKGGIGKSTTQQNTAAALARLGKKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,MRQIAFYGKGGIGKSTTQQNTAAALARLGKKIMVVGCDPKADCTRL...,Anc,Anc_1533,466,463,MLLKCDKTIPERKKHIVIKGEKGCGRGEGTGCEIACNVPTTPGDMT...,altall
2277,Anc_1534_altall,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,MSCVTKQNRAVAINPTRSCAPIGAMLANYGVHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGKKIMVVGCDPKADCTRL...,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,MRQVAFYGKGGIGKSTTQQNTAAALARIGKKIMVVGCDPKADCTRL...,Anc,Anc_1534,469,463,MLLKCDKTIPERKKHIVIKGEDGCGKGDGTGCEIACNVPTTPGDMT...,altall
2279,Anc_1535_altall,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,MSCVTMQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,Anc,Anc_1535,475,473,MQLKCNETLPERAQHIALKGPGGKCQRGDGTGCAIASNVATTPGDM...,altall
2289,Anc_1537_altall,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,MSCVTTQDRAVAINPTRSCAPIGAMLANYGIHGAITINHGSQGCAT...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,MRQVAFYGKGGIGKSTTQQNTAAALASMGNKLMVVGCDPKADCTRL...,Anc,Anc_1537,479,473,MQLKCNATLPERAQHIALKVSGGGCQRGDGTSCAIASNSATTPGDM...,altall


In [9]:
def write_fasta(x, path):
    with open(path + x.id + '.fasta', 'w') as f:
        f.write('>101\n{0}\n'.format(x.sequence))

In [26]:


aln_DKH_anc.query('variant == "altall"').copy().rename(columns={'seq_H': 'sequence'}).apply(
    lambda x: write_fasta(x, './re-alignment/hh-ancestral/'), 
    axis=1
)


6       None
7       None
12      None
17      None
25      None
        ... 
2276    None
2277    None
2279    None
2289    None
2290    None
Length: 384, dtype: object

In [11]:
aln_DKH_anc.query('variant != "altall"')

array(['alt4', 'alt5', 'alt2', 'alt3'], dtype=object)

In [13]:
aln_DKH_anc.query('variant != "altall"').copy().rename(columns={'seq_H': 'sequence'}).apply(
    lambda x: write_fasta(x, './re-alignment/hh-ancestral/'), 
    axis=1
)

0       None
1       None
3       None
4       None
5       None
        ... 
2298    None
2300    None
2301    None
2302    None
2303    None
Length: 1536, dtype: object

In [14]:
def add_header(x):
    print(f'adding header to {x.id}')
    try:
        with open('re-alignment/hh-ancestral/' + x.id + '.realigned.fasta') as f:
            content = f.read()
    except FileNotFoundError:
        print(f"unable to find {x.id}")
        return
    with open('re-alignment/hh-ancestral/' + x.id + '.realigned.fasta', 'w') as f:
        f.write(
            '#{0}\t2\n'.format(x.len_H)
        )
        f.write(content)

In [28]:
aln_DKH_anc['len_H']  = aln_DKH_anc['seq_H'].apply(len)
aln_DKH_anc.query('variant == "altall"').copy().apply(lambda x: add_header(x), axis=1)

adding header to Anc_1211_altall
adding header to Anc_1212_altall
adding header to Anc_1214_altall
adding header to Anc_1215_altall
adding header to Anc_1213_altall
adding header to Anc_1217_altall
adding header to Anc_1218_altall
adding header to Anc_1216_altall
adding header to Anc_1210_altall
adding header to Anc_1220_altall
adding header to Anc_1221_altall
adding header to Anc_1222_altall
adding header to Anc_1219_altall
adding header to Anc_1209_altall
adding header to Anc_1223_altall
adding header to Anc_1226_altall
adding header to Anc_1228_altall
adding header to Anc_1229_altall
adding header to Anc_1227_altall
adding header to Anc_1225_altall
adding header to Anc_1224_altall
adding header to Anc_1239_altall
adding header to Anc_1240_altall
adding header to Anc_1243_altall
adding header to Anc_1244_altall
adding header to Anc_1245_altall
adding header to Anc_1242_altall
adding header to Anc_1241_altall
adding header to Anc_1247_altall
adding header to Anc_1246_altall
adding hea

6       None
7       None
12      None
17      None
25      None
        ... 
2276    None
2277    None
2279    None
2289    None
2290    None
Length: 384, dtype: object

In [15]:
aln_DKH_anc['len_H']  = aln_DKH_anc['seq_H'].apply(len)
aln_DKH_anc.query('variant != "altall"').copy().apply(lambda x: add_header(x), axis=1)

adding header to Anc_1207_alt4
adding header to Anc_1207_alt5
adding header to Anc_1207_alt2
adding header to Anc_1207_alt3
adding header to Anc_1210_alt3
adding header to Anc_1213_alt5
adding header to Anc_1213_alt3
adding header to Anc_1213_alt2
adding header to Anc_1214_alt5
adding header to Anc_1214_alt3
adding header to Anc_1215_alt2
adding header to Anc_1215_alt3
adding header to Anc_1215_alt4
adding header to Anc_1215_alt5
adding header to Anc_1214_alt4
adding header to Anc_1214_alt2
adding header to Anc_1213_alt4
adding header to Anc_1216_alt4
adding header to Anc_1216_alt5
adding header to Anc_1216_alt2
adding header to Anc_1216_alt3
adding header to Anc_1217_alt3
adding header to Anc_1217_alt2
adding header to Anc_1217_alt4
adding header to Anc_1217_alt5
adding header to Anc_1218_alt2
adding header to Anc_1218_alt3
adding header to Anc_1218_alt5
adding header to Anc_1218_alt4
adding header to Anc_1212_alt3
adding header to Anc_1212_alt5
adding header to Anc_1212_alt2
adding h

0       None
1       None
3       None
4       None
5       None
        ... 
2298    None
2300    None
2301    None
2302    None
2303    None
Length: 1536, dtype: object