In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
""" 
Hot class 3 queries.

First query (1.): Pattern matches for anything with word 'hot' in relevant comments boxes.
Second query (2.): Find all classified class 3 variants, and subset those which went for Sanger validation.

dbs copy:2019-09-09
"""

gelirs = GELInterpretationReport.objects.latest_cases_by_sample_type('raredisease')


In [None]:
"""
Find hot class 3 case logic: 

1. All cases with 'hot' string in text fields                
"""
import re

class_3_regex_cases = []

for gelir in gelirs:
    try:
        if gelir.status != 'blocked':
            gelir_id = gelir.ir_family
            max_tier = gelir.max_tier
            clinician = gelir.ir_family.participant_family.clinician.name
            case_status = gelir.case_status

            proband_comment = gelir.ir_family.participant_family.proband.comment
            proband_discussion = gelir.ir_family.participant_family.proband.discussion
            proband_action = gelir.ir_family.participant_family.proband.action

            # Hot class 3 hunting, logic 1
            all_text_fields = (proband_comment, proband_discussion, proband_action)
            potential_hot_3 = re.search(r'(\s+|\'|\.|\"|\*)hot(\s+|\'|\.|\"|\*)', str(all_text_fields))

            if potential_hot_3:
                line = gelir.ir_family.participant_family.proband.gel_id, gelir.ir_family.ir_family_id
                class_3_regex_cases.append(line)
       
    except Proband.DoesNotExist:
        pass

# Sanity check
print(len(class_3_regex_cases))


In [None]:
# Output to xlsx
import xlsxwriter

workbook = xlsxwriter.Workbook("hot_class_3_cases_20190909.xlsx")
worksheet = workbook.add_worksheet('data')
row = 0
col = 0
header = ("gel_id", "cip-id")
worksheet.write_row(row, col, header)
row = row + 1

for report in class_3_regex_cases:
    worksheet.write_row(row, col, report)
    row = row + 1

workbook.close()

In [None]:
'''
Find hot class 3 case logic:

2. All class 3 cases that have gone to sanger confirmation
'''
class_3_variants_all = []
class_3_variants_validated = []

for gelir in gelirs:
    try:
        if gelir.status != 'blocked':
            if gelir.status != 'unknown':
                gelir_cipid = gelir.ir_family
                gelir_family_id = gelir.ir_family.ir_family_id
                case_status = gelir.case_status

                pvs = ProbandVariant.objects.filter(interpretation_report=gelir)

                for pv in pvs:
                    # If variant has gone to MDT
                    try:
                        raredisease_report = RareDiseaseReport.objects.get(proband_variant=pv)
                        
                        # If variant classified as class 3
                        if raredisease_report.classification == '3':
                            # try to get transcript variant data
                            try:
                                # get transcript
                                ptv = pv.get_transcript_variant()
                                hgvs_c = ptv.hgvs_c
                                gene = str(ptv.transcript.gene)
                            except:
                                hgvs_c = ""
                                gene = ""
                            
                            #12:g.1909905C>T
                            #hgvs_g = str(pv.variant.chromosome) + ":g." + str(pv.variant.position) + str(pv.variant.reference) + ">" + str(pv.variant.alternate)
                            
                            
                            data = gelir_cipid.ir_family_id, pv.variant_id, str(pv.variant.genome_assembly), pv.variant.chromosome, pv.variant.position, pv.variant.reference, pv.variant.alternate, hgvs_c, gene, raredisease_report.classification, pv.get_validation_status_display()
                            class_3_variants_all.append(data)

                            # If variant is in validation process
                            if pv.validation_status != 'U' and pv.validation_status != 'N':
                                data = gelir_cipid.ir_family_id, pv.variant_id, raredisease_report.classification, pv.get_validation_status_display()
                                class_3_variants_validated.append(data)

                    except RareDiseaseReport.DoesNotExist:
                        pass

    except Proband.DoesNotExist:
        pass

# sanity check
print("All class 3 variants={}".format(len(class_3_variants_all)))
print("Class 3 variants sent for validation={}".format(len(class_3_variants_validated)))

In [None]:
# Output to xlsx
import xlsxwriter

workbook = xlsxwriter.Workbook("class_3_variants_all_20190909.xlsx")
worksheet = workbook.add_worksheet('data')
row = 0
col = 0
header = ("cip_id", "variant_id", "build", "chrom", "pos", "ref", "alt", "hgvs_c", "gene", "variant_classification", "variant_status")
worksheet.write_row(row, col, header)
row = row + 1

for report in class_3_variants_all:
    worksheet.write_row(row, col, report)
    row = row + 1

workbook.close()