# Images for article on Atypia

In [212]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly import graph_objs as go
from plotly.graph_objs import *
from datetime import date,timedelta
from be.controllers.roman import make_roman
from be.controllers.counters import count_by_result,count_cases,count_categories,count_result_by_category
import os


## Notes
In the file from Dec 2023 there are some changes in the names of the columns 

"Bethesda Cathegory"->"BethesdacCathegory"

"MOLECULAR " --> "MOLECULAR"
"GENE MUTATED" --> "GENEMUTATED"

In [219]:
if not os.path.exists("images"):
    os.mkdir("images")

## First Graph 1

Load the data and set the necessary filters

In [220]:
datapath="data/AUS-PCR_Dec_2023.csv"
df=pd.read_csv(datapath,encoding='ISO-8859-1')# may need to remove the encoding spec 
## Renaming the column "Bethesda Cathegory" 
df.rename(columns={'BethesdaCathegory': 'Bethesda Cathegory','MOLECULAR':'MOLECULAR ','GENEMUTATED':'GENE MUTATED'}, inplace=True)

for col in ["ACCESS_DATE","SIGN_DATE"]:
    df[col]=df[col].apply(lambda z:pd.Timestamp(z))
################################################RENAME PATHOLOGISTS
#df["CYTOPATHOLOGIST"]=df["CYTOPATHOLOGIST"].apply(lambda z: "Pathologist " + str(int(z)) if(str(z) != 'nan') else z)
###################################### LIST OF PATHOLOGISTS

pathologists=df["CYTOPATHOLOGIST"].tolist()
pathologists=[x for x in pathologists if str(x) !="nan"]
pathologists=list(map(lambda z:int(z),pathologists))
pathologists=list(set(pathologists))


### In this line we set a treshold for filtering out pathologists
# pathologists=[x for x in pathologists if df[df["CYTOPATHOLOGIST"]==x].shape[0]>=200]

#pathologists=sorted(pathologists,key=lambda z: eval(z[11:]))
pathologists=[x for x in pathologists if df[df["CYTOPATHOLOGIST"]==x].shape[0]>=5]
df["CYTOPATHOLOGIST"]=df["CYTOPATHOLOGIST"].apply(lambda z: "Pathologist " + str(int(z)) if(str(z) != 'nan') else z)

pathologists=["Pathologist " + str(z) for z in pathologists]
df=df[df["CYTOPATHOLOGIST"].isin(pathologists)]


In [221]:
df.head()

Unnamed: 0,YEAR,SURGICAL_NUMBER,ACCESS_DATE,SIGN_DATE,TAT,SEX,AGE,SITE,RIGHT,LEFT,...,ROM,GENE MUTATED,MUTATION,VAF,FUSIONS,CNV,GEP,PTH,C-CELLS,Medullary
0,2018,MIA18-6387,2018-07-31,2018-06-08,6,Female,59,Thyroid,1.0,0.0,...,70.0,0,0,0,TRIM24/BRAF,0,0,0,0.0,0.0
1,2019,MIA19-3112,2019-04-22,2019-04-24,2,Male,74,Thyroid,1.0,0.0,...,80.0,0,0,0,TRA2A/THADAandIGF2BP3,0,1,0,0.0,0.0
2,2020,MIA20-3219,2020-09-06,2020-06-15,6,Male,54,Thyroid,0.0,1.0,...,60.0,0,0,0,TRA2A/THADA,0,1,0,0.0,0.0
3,2022,MIA22-3033,2022-02-05,2022-05-05,3,Female,69,Thyroid,1.0,0.0,...,70.0,0,0,0,TRA2A/THADA,0,0,0,0.0,0.0
4,2022,MIA22-3436,2022-05-18,2022-05-23,5,Male,51,Thyroid,0.0,1.0,...,70.0,0,0,0,TRA2A/THADA,0,0,0,0.0,0.0


### Here we set the names that should be used for the pathologists:



In [222]:
naming_dict=dict()
naming_dict["Pathologist 2"]="SP1"
naming_dict["Pathologist 5"]="JP1"
naming_dict["Pathologist 1"]="JP2"
naming_dict["Pathologist 3"]="JP3"
naming_dict["Pathologist 4"]="JP4"
naming_dict["Pathologist 6"]="Pathologist 6"
naming_dict["Pathologist 7"]="SP2"
naming_dict["Pathologist 8"]="SP3"
naming_dict["Pathologist 9"]="SP4"
naming_dict["Pathologist 10"]="JP5"
naming_dict["Pathologist 11"]="Pathologists 11"
naming_dict["Pathologist 12"]="JP6"
naming_dict["All pathologists"]="Overall"


In [223]:
df["PATHOLOGIST"]=df["CYTOPATHOLOGIST"].apply(lambda z: naming_dict[z])

In [224]:
df.head()

Unnamed: 0,YEAR,SURGICAL_NUMBER,ACCESS_DATE,SIGN_DATE,TAT,SEX,AGE,SITE,RIGHT,LEFT,...,GENE MUTATED,MUTATION,VAF,FUSIONS,CNV,GEP,PTH,C-CELLS,Medullary,PATHOLOGIST
0,2018,MIA18-6387,2018-07-31,2018-06-08,6,Female,59,Thyroid,1.0,0.0,...,0,0,0,TRIM24/BRAF,0,0,0,0.0,0.0,SP1
1,2019,MIA19-3112,2019-04-22,2019-04-24,2,Male,74,Thyroid,1.0,0.0,...,0,0,0,TRA2A/THADAandIGF2BP3,0,1,0,0.0,0.0,JP1
2,2020,MIA20-3219,2020-09-06,2020-06-15,6,Male,54,Thyroid,0.0,1.0,...,0,0,0,TRA2A/THADA,0,1,0,0.0,0.0,SP4
3,2022,MIA22-3033,2022-02-05,2022-05-05,3,Female,69,Thyroid,1.0,0.0,...,0,0,0,TRA2A/THADA,0,0,0,0.0,0.0,JP1
4,2022,MIA22-3436,2022-05-18,2022-05-23,5,Male,51,Thyroid,0.0,1.0,...,0,0,0,TRA2A/THADA,0,0,0,0.0,0.0,SP2


## Category distribution by pathologists

### here we apply the desired filters

In [225]:
############# CATEGORY RATIOS BY PATHOLOGISTS
def count_data(all_paths_df):
    count_data=pd.DataFrame()

    count_data["Pathologists"]=[naming_dict[pathologist] for pathologist in pathologists]
    for i in range(1,7):
        count_data[make_roman(i)]=[count_categories(all_paths_df,pathologist,i) for pathologist in pathologists]
    count_data["Cases"]=[count_cases(all_paths_df,pathologist) for pathologist in pathologists]
    count_data["Positives"]=[count_by_result(all_paths_df,pathologist,"POSITIVE") for pathologist in pathologists]
    count_data["Currently Negative"]=[count_by_result(all_paths_df,pathologist,"CURRENTLY NEGATIVE") for pathologist in pathologists]
    count_data["Positives or CN"]=count_data["Positives"]+count_data["Currently Negative"]
    count_data["positive_rate"]=count_data["Positives or CN"]/count_data["Cases"]
    count_data["positive_rate"]=count_data["positive_rate"].apply(lambda z:round(z,3))
    for i in range(1,7):
        count_data["TBS "+make_roman(i)]=count_data[make_roman(i)]/count_data["Cases"]
        count_data["TBS "+make_roman(i)]=count_data["TBS "+make_roman(i)].apply(lambda z:round(z,3))
    # count_data["Pathologists"]=pathologists

    count_data["TBS III Positives"]=[count_result_by_category(df,pathologist,3,"POSITIVE") for pathologist in pathologists]
    count_data["TBS III + Rate"]=round(count_data["TBS III Positives"]/count_data["III"],3)
    new_row=dict()
    new_row["Pathologists"]=["Overall"]
    new_row["Cases"]=[count_data["Cases"].sum()]
    for i in range(1,7):
        new_row[make_roman(i)]=[count_data[make_roman(i)].sum()]
    for i in range(1,7):
        new_row["ratio category "+make_roman(i)]=[new_row[make_roman(i)][0]/new_row["Cases"][0]]
    new_row["Positives"]=[count_data["Positives"].sum()]
    new_row["positive_rate"]=[count_data["Positives"].sum()/count_data["Cases"].sum()]
    new_row["TBS III Positives"]=[count_data["TBS III Positives"].sum()]
    new_row["TBS III + Rate"]=[round(count_data["TBS III Positives"].sum()/count_data["III"].sum(),3)]
    for i in range(1,7):
        new_row["TBS "+ make_roman(i)]=[round(count_data[make_roman(i)].sum()/count_data["Cases"].sum(),3)]

    new_row=pd.DataFrame.from_dict(new_row)
    count_data=pd.concat([count_data,new_row])
    count_data["TBS III Call Rate"]=count_data["TBS III"]
    return count_data

In [226]:
data=count_data(df)

In [227]:
data.head(11)

Unnamed: 0,Pathologists,I,II,III,IV,V,VI,Cases,Positives,Currently Negative,...,TBS VI,TBS III Positives,TBS III + Rate,ratio category I,ratio category II,ratio category III,ratio category IV,ratio category V,ratio category VI,TBS III Call Rate
0,JP2,6,44,17,2,0,2,71,6,0.0,...,0.028,4,0.235,,,,,,,0.239
1,SP1,109,835,132,14,6,39,1135,42,0.0,...,0.034,34,0.258,,,,,,,0.116
2,JP3,11,90,23,3,0,6,133,3,0.0,...,0.045,2,0.087,,,,,,,0.173
3,JP4,6,51,8,0,1,2,68,0,0.0,...,0.029,0,0.0,,,,,,,0.118
4,JP1,101,707,329,21,7,59,1224,77,0.0,...,0.048,64,0.195,,,,,,,0.269
5,SP2,41,947,118,25,11,46,1188,42,0.0,...,0.039,28,0.237,,,,,,,0.099
6,SP3,80,771,165,7,7,32,1062,45,0.0,...,0.03,43,0.261,,,,,,,0.155
7,SP4,49,561,136,7,7,35,795,45,0.0,...,0.044,41,0.301,,,,,,,0.171
8,JP5,1,28,21,2,0,5,57,3,0.0,...,0.088,3,0.143,,,,,,,0.368
9,JP6,168,1157,368,24,12,73,1802,110,0.0,...,0.041,95,0.258,,,,,,,0.204


In [228]:
path=sorted([naming_dict[z] for z in pathologists])+["Overall"]

In [229]:
good=[]
for person in path:
    for cat in ["TBS "+make_roman(i)  for i in range(1,7)]:
        new=dict()
        new["Pathologist"]=person
        new["Category"]=cat
        new["Rate"]=list(data[data["Pathologists"]==person][cat])[0]
        good.append(new)

In [230]:
good=pd.DataFrame.from_records(good)


In [231]:
def text_column(cat,value):
    ans=""
    if cat=="TBS III":
        ans=str(round(value*100,1))+"%"
    return ans
    

In [232]:
good['letters'] = good.apply(lambda x: text_column(x.Category, x.Rate), axis=1)

In [233]:
good

Unnamed: 0,Pathologist,Category,Rate,letters
0,JP1,TBS I,0.083,
1,JP1,TBS II,0.578,
2,JP1,TBS III,0.269,26.9%
3,JP1,TBS IV,0.017,
4,JP1,TBS V,0.006,
...,...,...,...,...
61,Overall,TBS II,0.689,
62,Overall,TBS III,0.175,17.5%
63,Overall,TBS IV,0.014,
64,Overall,TBS V,0.007,


In [235]:

def make_category_distribution_path():
    fig = px.bar(good, x="Pathologist", y="Rate", color="Category",text="letters",color_discrete_sequence=px.colors.qualitative.Plotly)
    fig.update_layout(
                autosize=True,
                margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=40,
                    pad=0
                ),
                # template="plotly_dark",
                title={
                "text":"Category Distribution by Pathologist",
                'y':0.98,
                'x':0.46,
                'xanchor': 'center',
                'yanchor': 'top'
                },
                legend_title="",
                xaxis_title="Pathologist",
                #yaxis_title=yaxislabel   # legend_traceorder="reversed",

            )
    fig.update_layout({
    'plot_bgcolor': "white",
    'paper_bgcolor':"white",
    })

    return fig

In [236]:
fig=make_category_distribution_path()

In [237]:
fig.show()

In [238]:
fig.write_image("images/images_Dec_2023/graph1.png")

In [239]:
### Here we order the dataframe "data"
newdata=pd.DataFrame()
for person in path:

    new=data[data["Pathologists"]==person]
    newdata=pd.concat([newdata,new])
data=newdata

## Graph 2

In [240]:
years=range(2018,2023)

In [241]:
def make_gap_minder_data(dataframe):
    gap_minder_data=pd.DataFrame()
    for year in years:
        for pathologist in pathologists:
            new_row=dict()
            size=dataframe[(dataframe["CYTOPATHOLOGIST"]==pathologist)&(dataframe["YEAR"]==year)].shape[0]

            calls_CatIII=dataframe[(dataframe["CYTOPATHOLOGIST"]==pathologist)&(dataframe["YEAR"]
            ==year)&(dataframe["Bethesda Cathegory"]==3)].shape[0]
            if calls_CatIII>0:
                call_rate_CatIII=calls_CatIII/size

                positive_rate_CatIII=dataframe[(dataframe["CYTOPATHOLOGIST"]==pathologist)&(dataframe["YEAR"]
                ==year)&(dataframe["Bethesda Cathegory"]==3)&(dataframe["RESULT"]=="POSITIVE")].shape[0]/calls_CatIII
                new_row["Pathologist"]=[pathologist]
                new_row["Year"]=[year]
                new_row["Case Count"]=[size]
                new_row["Call rate category III"]=[call_rate_CatIII]
                new_row["Positive rate category III"]=[positive_rate_CatIII]
                new_row=pd.DataFrame.from_dict(new_row)
                gap_minder_data=pd.concat([gap_minder_data,new_row],ignore_index=True)
    for year in years:
        new_row=dict()
        size=dataframe[(dataframe["YEAR"]==year)].shape[0]

        calls_CatIII=dataframe[(dataframe["YEAR"]
        ==year)&(dataframe["Bethesda Cathegory"]==3)].shape[0]
        if calls_CatIII>0:
            call_rate_CatIII=calls_CatIII/size

            positive_rate_CatIII=dataframe[(dataframe["YEAR"]
            ==year)&(dataframe["Bethesda Cathegory"]==3)&(dataframe["RESULT"]=="POSITIVE")].shape[0]/calls_CatIII
            new_row["Pathologist"]=["All pathologists"]
            new_row["Year"]=[year]
            new_row["Case Count"]=[size]
            new_row["Call rate category III"]=[call_rate_CatIII]
            new_row["Positive rate category III"]=[positive_rate_CatIII]
            new_row=pd.DataFrame.from_dict(new_row)
            gap_minder_data=pd.concat([gap_minder_data,new_row])
    return gap_minder_data

In [242]:
gap_minder_data=make_gap_minder_data(df)

In [243]:
time_df=pd.DataFrame()
for pato in pathologists+["All pathologists"]:
    time_path=pd.DataFrame()
    time_path["Year"]=list(gap_minder_data[gap_minder_data["Pathologist"]==pato]["Year"].unique())
    time_path["Cat III Call Rate"]=list(gap_minder_data[gap_minder_data["Pathologist"]==pato]["Call rate category III"])
    time_path["Pathologist"]=[pato for i in range(time_path["Cat III Call Rate"].shape[0]) ]
    time_df=pd.concat([time_df,time_path])

In [244]:
path

['JP1',
 'JP2',
 'JP3',
 'JP4',
 'JP5',
 'JP6',
 'SP1',
 'SP2',
 'SP3',
 'SP4',
 'Overall']

In [245]:
time_df["Pathologist"]=time_df["Pathologist"].apply(lambda z: naming_dict[z])

In [246]:
def make_CatIII_Call_Rate_time(
        dataframe):
    """Create a plot given dictionary  whose keys are the six Bethesda Categories and values are dataframes of count by year."""


        # graph.add_trace(go.Scatter(x=possibilities[possibility]["x"], y= possibilities[possibility]["y"],mode="markers+lines", name=possibility, showlegend=True, visible=True,fill=None,line=dict(color=paleta[count%N])))
    graph=px.line(dataframe,x="Year", y="Cat III Call Rate",color="Pathologist",markers=True, color_discrete_sequence=px.colors.qualitative.T10)
    # graph=px.line(x=[2,4],y=[3,4])

    graph.update_layout(
                autosize=True,
                margin=dict(
                    l=0,
                    r=0,
                    b=0,
                    t=40,
                    pad=0
                ),
                #template="plotly_dark",
                title={
                "text":"AUS Rate Over Time",
                'y':0.98,
                'x':0.46,
                'xanchor': 'center',
                'yanchor': 'top'
                },
                # legend_title="Category",
                xaxis_title="Year",
                yaxis_title="AUS Rate",

            )
    return graph

In [247]:
### Here we order the dataframe "data"
newtime=pd.DataFrame()
for person in path:
    new=time_df[time_df["Pathologist"]==person]
    newtime=pd.concat([newtime,new])
time_df=newtime

In [248]:
fig=make_CatIII_Call_Rate_time(time_df)

In [249]:
fig.show()

In [250]:
fig.write_image("images/images_Dec_2023/graph2.png")

## Graph 3


In [251]:
def make_pie(dataframe,names,values,title):
    fig=px.pie(dataframe, values=values, names=names,
             color_discrete_sequence=px.colors.qualitative.T10


    )

    fig.update_layout(
        autosize=True,
        margin=dict(
            l=0,
            r=0,
            b=0,
            t=40,
            pad=0
        ),
        #template="plotly_dark",
        title={
        "text":title,
        'y':0.98,
        'x':0.50,
        'xanchor': 'center',
        'yanchor': 'top'
        },

    )
    fig.update_traces(sort=False)

    return fig

In [252]:
def molecular_by_result_graph(dataframe):
        count_results=dataframe["RESULT"].value_counts().to_dict()
        pie_data=pd.DataFrame()
        pie_data["Result"]=list(count_results.keys())
        pie_data["Count"]=list(count_results.values())
        return make_pie(pie_data,"Result","Count","Molecular Tests By Result")

In [253]:
pie_df=df[~df["MOLECULAR "].str.contains("AFIRMA")]

In [254]:
pie_df["MOLECULAR "].unique()

array(['THYROSEQ', 'NO', 'CANCELLED'], dtype=object)

In [255]:
fig=molecular_by_result_graph(pie_df)

In [256]:
fig.show()

In [257]:
fig.write_image("images/images_Dec_2023/graph3.png")

### Graph 4

In [258]:
def linear_approx(X,Y):
    A = np.vstack([X, np.ones(len(X))]).T
    m, c = np.linalg.lstsq(A, Y, rcond=None)[0]
    return m,c

In [259]:
def scat_callrate_vs_positive(dataframe,title,trend=False):
    scat=px.scatter(dataframe,x='TBS '+make_roman(3)+ " Call Rate",
                    y='TBS III + Rate',
                    color='Pathologists',
                    # trendline="ols",
                    # trendline_scope="overall",
                    size='Cases',
                    hover_data=['Pathologists'],color_discrete_sequence=px.colors.qualitative.Plotly,
                    )
    if trend==True:
        X=list(dataframe["TBS III Call Rate"])[:-1]
        Y=list(dataframe["TBS III + Rate"])[:-1]
        m,c=linear_approx(X,Y)
        t = np.linspace(0.1, 0.3)
        #fig = px.scatter(x=t, y=t**2, labels={'x':'t', 'y':'cos(t)'})
        scat.add_scatter(x=t, y=m*t+c,showlegend=False)
    scat.update_layout(
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=40,
                pad=0
            ),
            # template="plotly_dark",
            title={
            "text":title,
            'y':0.98,
            'x':0.46,
            'xanchor': 'center',
            'yanchor': 'top'
            },
            legend_title="",
            xaxis_title="AUS Rate",
            yaxis_title="PCR"
            # legend_traceorder="reversed",

        )
    #scat.data=scat.data[:14]
    #scat.update_layout(yaxis_range=[0.1,0.35])

    # scat.update_layout({
    #  'plot_bgcolor': "lightgray",
    #  'paper_bgcolor':"white",
    
    #  })
    return scat

In [260]:
fig=scat_callrate_vs_positive(data,"AUS Rate vs PCR Overall Period",trend=True)

In [261]:
fig.show()

In [262]:
fig.write_image("images/images_Dec_2023/graph4.png")

In [263]:
yeardata=dict()
for year in years:
   yeardata[year]=count_data(df[df["YEAR"]==year])

In [264]:
for i in range(2018,2023):
    fig=scat_callrate_vs_positive(yeardata[i],"AUS Rate vs PCR "+str(i))
    fig.write_image("images/AUSvsPCR"+str(i)+".png")

In [265]:
def scat_callrate_vs_positive_by_year(dataframe,title):
    scat=px.scatter(dataframe,x='TBS '+make_roman(3)+ " Call Rate",
                    y='TBS III + Rate',
                    color='Year',
                    size='Cases',
                    hover_data=['Pathologists'],color_discrete_sequence=px.colors.qualitative.Plotly
                    )
    scat.update_layout(
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=40,
                pad=0
            ),
            # template="plotly_dark",
            title={
            "text":title,
            'y':0.98,
            'x':0.46,
            'xanchor': 'center',
            'yanchor': 'top'
            },
            legend_title="",
            xaxis_title="AUS Rate",
            yaxis_title="PCR"
            # legend_traceorder="reversed",

        )
    #scat.update_layout(yaxis_range=[0.1,0.35])

    # scat.update_layout({
    #  'plot_bgcolor': "lightgray",
    #  'paper_bgcolor':"white",
    
    #  })
    return scat

In [266]:
data_with_time=pd.DataFrame()
for item in list(yeardata.keys()):
    new=yeardata[item]
    new["Year"]=str(item)
    data_with_time=pd.concat([data_with_time,new])


In [267]:
data_with_time[data_with_time["Pathologists"]==path[1]]

Unnamed: 0,Pathologists,I,II,III,IV,V,VI,Cases,Positives,Currently Negative,...,TBS III Positives,TBS III + Rate,ratio category I,ratio category II,ratio category III,ratio category IV,ratio category V,ratio category VI,TBS III Call Rate,Year
0,JP2,6,42,17,2,0,2,69,6,0.0,...,4,0.235,,,,,,,0.246,2018
0,JP2,0,2,0,0,0,0,2,0,0.0,...,4,inf,,,,,,,0.0,2019
0,JP2,0,0,0,0,0,0,0,0,0.0,...,4,inf,,,,,,,,2020
0,JP2,0,0,0,0,0,0,0,0,0.0,...,4,inf,,,,,,,,2021
0,JP2,0,0,0,0,0,0,0,0,0.0,...,4,inf,,,,,,,,2022


In [268]:
def AUS_call_rate_vs_PCR_by_Path_and_year(n):
    return scat_callrate_vs_positive_by_year(data_with_time[data_with_time["Pathologists"]==path[n]],"AUS Rate vs PCR in Time for "+ path[n])

In [269]:
AUS_call_rate_vs_PCR_by_Path_and_year(9)

In [270]:
for i in range(10):
    fig=AUS_call_rate_vs_PCR_by_Path_and_year(i)
    fig.write_image("images/images_Dec_2023/AUSvsPCRinTimefor"+path[i]+".png")

## Graph 5

In [271]:
def make_gene(data_frame,labels_column,values_column,title):
    fig = px.bar( data_frame,x=labels_column, y=values_column, title="Wide-Form Input",color=labels_column,
     color_discrete_sequence=px.colors.qualitative.T10
    )
    fig.update_xaxes(visible=False, showticklabels=False)
    fig.update_layout(
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=40,
                pad=0
            ),
            #template="plotly_dark",
            title={
            "text":title,
            'y':0.98,
            'x':0.46,
            'xanchor': 'center',
            'yanchor': 'top'
            },
            legend_title="",
            xaxis_title=None,

            # legend_traceorder="reversed",

        )

    return fig

In [272]:
def gene_graph(dataframe,title):
        labels_bar=list(dataframe["GENE MUTATED"].unique())
        labels_bar=[x for x in labels_bar if str(x) not in ["?","nan","0"]]
        def count_instances(x):
            return dataframe[dataframe["GENE MUTATED"]==x].shape[0]

        labels_bar=sorted(labels_bar,key=lambda z:count_instances(z),reverse=True)[:15]
        values_bar=[count_instances(x) for x in labels_bar]
        bar_data=pd.DataFrame()
        bar_data["Count"]=values_bar
        bar_data["GENE MUTATED"]=labels_bar
        bar_data["GENE MUTATED"]=bar_data["GENE MUTATED"].astype(str)






        return make_gene(bar_data,"GENE MUTATED","Count",title)

In [274]:
fig=gene_graph(df,"Gene Mutated Count Molecular Tests")

In [275]:
fig.show()

In [280]:
fig.write_image("images/images_Dec_2023/graph5.png")

## Graph 6

In [290]:
df_currently_negative=df[df["RESULT"]=="CURRENTLYNEGATIVE"]

In [291]:
fig=gene_graph(df_currently_negative,"Gene Mutate Count Currently Negative")

In [292]:
fig.show()

In [293]:
fig.write_image("images/images_Dec_2023/graph6.png")