In [198]:
import pytest
import pandas as pd
import numpy as np
import random

In [199]:
file1 = pd.read_csv('/Users/avapapetti/Desktop/CNV_Files/SL88823_20180802.cnv.csv', sep = "\t")
file2 = pd.read_csv('/Users/avapapetti/Desktop/CNV_Files/SL88824_20180802.cnv.csv', sep = "\t")

file1['Sample#'] = 1
file2['Sample#'] = 2

In [200]:
def test_cnv_drop():
    file1_new = file1.drop(file1[file1.Chrom == 'chrM'].index)
    chroms = file1_new.Chrom.unique()
    assert 'chrM' not in chroms

In [201]:
test_cnv_drop()

In [202]:
def test_cnv_filtering():
    test_min_cnv_length = 1000
    test_p_value = .90
    
    file2_filtered1 = file2[(np.abs(file2.Start - file2.Stop) >= test_min_cnv_length) & (file2.P_Value >= test_p_value)]

    file2_filtered2 = file2.copy()
    file2_filtered2['CNV_Length'] = np.abs(file2.Start - file2.Stop)
    file2_filtered2 = file2_filtered2[file2_filtered2.CNV_Length >= test_min_cnv_length]
    file2_filtered2 = file2_filtered2[file2_filtered2.P_Value >= test_p_value]
  
    assert file2_filtered2.CNV_Length.min() >= test_min_cnv_length
    assert file2_filtered2.P_Value.min() >= test_p_value
    assert file2_filtered1.equals(file2_filtered2.drop(['CNV_Length'], axis = 1))
    

In [203]:
test_cnv_filtering()

In [204]:
def test_byChrom():
    file2_byChrom = file2.groupby('Chrom')
    assert file2_byChrom.ngroups == file2.Chrom.nunique()

In [205]:
test_byChrom()

In [206]:
def test_get_group():
    file1_random_cnv = file1.iloc[random.randrange(len(file1)-1)]
    file2_byChrom = file2.groupby('Chrom')
    test_file2_cnvs = file2_byChrom.get_group(file1_random_cnv.Chrom)
    
    assert test_file2_cnvs.Chrom.unique()[0] == file1_random_cnv.Chrom

In [207]:
test_get_group()

In [208]:
def test_type_match():
    file1_cnv_del = file1[file1.Type == 'DELETION']
    test_file2_cnvs = file2.loc[file2.Type.isin(file1_cnv_del.Type)]
    assert file1_cnv_del.Type.unique() == test_file2_cnvs.Type.unique()

In [209]:
test_type()

In [210]:
def test_overlap():
    test_min_overlap = 25
    test_file2_cnvs2 = file2.copy()
    
    file1_random_cnv = file1.iloc[random.randrange(len(file1)-1)]
    test_file2_cnvs2['Start_Overlap'] = np.abs(file2.Start.subtract(file1_random_cnv.Start))
    test_file2_cnvs2['Stop_Overlap'] = np.abs(file2.Stop.subtract(file1_random_cnv.Stop))
    test_file2_cnvs2 = test_file2_cnvs2.loc[(test_file2_cnvs2.Start_Overlap <= test_min_overlap) & 
        (test_file2_cnvs2.Stop_Overlap <= test_min_overlap)]
    
    if(len(test_file2_cnvs2) > 0):
        assert test_file2_cnvs2.Start_Overlap.max() <= test_min_overlap
        assert test_file2_cnvs2.Stop_Overlap.max() <= test_min_overlap

In [211]:
test_overlap()