In [None]:
import numpy as np
import pandas as pd

from pyliftover import LiftOver

## Load data

In [None]:
df = pd.read_table('data/tads_hESC_hg19_with_ids.txt')
df.head()

## Convert hg19->hg38

In [None]:
lo = LiftOver('hg19', 'hg38')

In [None]:
def convert_pos(chr_, pos):
    res = lo.convert_coordinate(chr_, pos)
    
    if len(res) == 0:
        return np.nan
    
    assert len(res) == 1, res
    r_chr, r_pos, r_strand, r_score = res[0]
    return r_pos

In [None]:
# case of no conversion
df.loc[1226]
convert_pos('chr6', 61880166)

In [None]:
def _conv(row):
    tad_start = convert_pos(row['chrname'], row['tad_start'])
    tad_stop = convert_pos(row['chrname'], row['tad_stop'])
    
    return pd.Series({
        '#tad_id': row['#tad_id'],
        'chrname': row['chrname'],
        'tad_start': min(tad_start, tad_stop),
        'tad_stop': max(tad_start, tad_stop)
    })

df_conv = df.apply(_conv, axis=1).dropna()

df_conv['#tad_id'] = df_conv['#tad_id'].astype(int)
df_conv['tad_start'] = df_conv['tad_start'].astype(int)
df_conv['tad_stop'] = df_conv['tad_stop'].astype(int)

df_conv.head()

### Check special cases

In [None]:
# coordinate order gets switched
print(convert_pos('chr1', 144848643))
print(convert_pos('chr1', 145048643))

In [None]:
# vanishing tads
tad_len = df_conv['tad_stop'] - df_conv['tad_start']
empty_tads = df_conv[tad_len==0]

print(empty_tads.shape)
empty_tads.head()

## Save result

In [None]:
print(df.shape)
print(df_conv.shape)

In [None]:
df_conv.to_csv('results/tads_hESC_hg38.tsv', sep='\t', index=False)