In [208]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly import graph_objs as go
from plotly.graph_objs import *
from datetime import date,timedelta
from be.controllers.roman import make_roman
from be.controllers.counters import count_by_result,count_cases,count_categories,count_result_by_category

Load the data and set the necessary filters

In [224]:
df=pd.read_csv("data/USCAP_Large.csv")
for col in ["ACCESS_DATE","SIGN_DATE"]:
    df[col]=df[col].apply(lambda z:pd.Timestamp(z))
################################################RENAME PATHOLOGISTS
df["CYTOPATHOLOGIST"]=df["CYTOPATHOLOGIST"].apply(lambda z: "Pathologist " + str(int(z)) if(str(z) != 'nan') else z)
###################################### LIST OF PATHOLOGISTS

pathologists=df["CYTOPATHOLOGIST"].tolist()
pathologists=[x for x in pathologists if str(x) !="nan"]
#pathologists=list(map(lambda z:int(z),pathologists))
pathologists=list(set(pathologists))


### In this line we set a treshold for filtering out pathologists
# pathologists=[x for x in pathologists if df[df["CYTOPATHOLOGIST"]==x].shape[0]>=200]

pathologists=sorted(pathologists,key=lambda z: eval(z[11:]))
df=df[df["CYTOPATHOLOGIST"].isin(pathologists)]


first_day=min(df["SIGN_DATE"].to_list())
last_day=max(df["SIGN_DATE"].to_list())
years=list(df["YEAR"].unique())


Default_time_ranges=dict()

Default_time_ranges["Historical"]=[first_day,date.today()]
Default_time_ranges["2022"]=[date(2022,1,1),date(2022,12,31)]
Default_time_ranges["2021"]=[date(2021,1,1),date(2021,12,31)]
Default_time_ranges["2020"]=[date(2020,1,1),date(2020,12,31)]
Default_time_ranges["2019"]=[date(2019,1,1),date(2019,12,31)]
Default_time_ranges["2018"]=[date(2018,1,1),date(2018,12,31)]

### Here we set the names that should be used for the pathologists:



In [225]:
naming_dict=dict()
naming_dict["Pathologist 2"]="SP1"
naming_dict["Pathologist 5"]="SP2"
naming_dict["Pathologist 1"]="JP1"
naming_dict["Pathologist 3"]="JP2"
naming_dict["Pathologist 4"]="JP3"
naming_dict["Pathologist 6"]="JP4"
naming_dict["Pathologist 7"]="JP5"
naming_dict["Pathologist 8"]="JP6"
naming_dict["Pathologist 9"]="JP7"
naming_dict["Pathologist 10"]="JP8"
naming_dict["Pathologist 11"]="JP9"
naming_dict["Pathologist 12"]="JP10"



In [226]:
df["PATHOLOGIST"]=df["CYTOPATHOLOGIST"].apply(lambda z: naming_dict[z])

## Category distribution by pathologists

In [227]:
pathologists=["Pathologist 2","Pathologist 5"]+["Pathologist "+str(i) for i in range(1,13) if i not in [2,5]]

# here we apply the desired filters

In [206]:
#df=df[df["MOLECULAR "].isin(["THYROSEQ","THYROSEQ "])]

In [228]:
### In this line we set a treshold for filtering out pathologists
pathologists=[x for x in pathologists if df[df["CYTOPATHOLOGIST"]==x].shape[0]>=5]


In [347]:
def make_stacked_bar(data_frame,labels_column,values_column,title,yaxislabel):
    feg = px.bar( data_frame,x=labels_column, y=values_column, title="Wide-Form Input",
    color_discrete_sequence=px.colors.qualitative.Vivid,text=values_column
    # color_discrete_sequence=["red", "green", "blue", "goldenrod", "magenta","orange"]
    )
    feg.update_layout(
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=40,
                pad=0
            ),
            # template="plotly_dark",
            title={
            "text":title,
            'y':0.98,
            'x':0.46,
            'xanchor': 'center',
            'yanchor': 'top'
            },
            legend_title="",
            xaxis_title=None,
            yaxis_title=yaxislabel   # legend_traceorder="reversed",

        )
    feg.update_layout({
'plot_bgcolor': "white",
'paper_bgcolor':"white",
})
    feg.for_each_trace(lambda t: t.update(text = []) if t.name not in ['T1'] else ())

    return feg

In [256]:
############# CATEGORY RATIOS BY PATHOLOGISTS
def count_data(all_paths_df):
    count_data=pd.DataFrame()

    count_data["Pathologists"]=[naming_dict[pathologist] for pathologist in pathologists]
    for i in range(1,7):
        count_data[make_roman(i)]=[count_categories(all_paths_df,pathologist,i) for pathologist in pathologists]
    count_data["Cases"]=[count_cases(all_paths_df,pathologist) for pathologist in pathologists]
    count_data["Positives"]=[count_by_result(all_paths_df,pathologist,"POSITIVE") for pathologist in pathologists]
    count_data["Currently Negative"]=[count_by_result(all_paths_df,pathologist,"CURRENTLY NEGATIVE") for pathologist in pathologists]
    count_data["Positives or CN"]=count_data["Positives"]+count_data["Currently Negative"]
    count_data["positive_rate"]=count_data["Positives or CN"]/count_data["Cases"]
    count_data["positive_rate"]=count_data["positive_rate"].apply(lambda z:round(z,3))
    for i in range(1,7):
        count_data["TBS "+make_roman(i)]=count_data[make_roman(i)]/count_data["Cases"]
        count_data["TBS "+make_roman(i)]=count_data["TBS "+make_roman(i)].apply(lambda z:round(z,3))
    # count_data["Pathologists"]=pathologists

    count_data["TBS III Positives"]=[count_result_by_category(df,pathologist,3,"POSITIVE") for pathologist in pathologists]
    count_data["TBS III + Rate"]=round(count_data["TBS III Positives"]/count_data["III"],3)
    new_row=dict()
    new_row["Pathologists"]=["Overall"]
    new_row["Cases"]=[count_data["Cases"].sum()]
    for i in range(1,7):
        new_row[make_roman(i)]=[count_data[make_roman(i)].sum()]
    for i in range(1,7):
        new_row["ratio category "+make_roman(i)]=[new_row[make_roman(i)][0]/new_row["Cases"][0]]
    new_row["Positives"]=[count_data["Positives"].sum()]
    new_row["positive_rate"]=[count_data["Positives"].sum()/count_data["Cases"].sum()]
    new_row["TBS III Positives"]=[count_data["TBS III Positives"].sum()]
    new_row["TBS III + Rate"]=[round(count_data["TBS III Positives"].sum()/count_data["III"].sum(),3)]
    for i in range(1,7):
        new_row["TBS "+ make_roman(i)]=[round(count_data[make_roman(i)].sum()/count_data["Cases"].sum(),3)]

    new_row=pd.DataFrame.from_dict(new_row)
    count_data=pd.concat([count_data,new_row])
    count_data["TBS III Call Rate"]=count_data["TBS III"]
    return count_data

In [257]:
data=count_data(df)

In [258]:
data.columns

Index(['Pathologists', 'I', 'II', 'III', 'IV', 'V', 'VI', 'Cases', 'Positives',
       'Currently Negative', 'Positives or CN', 'positive_rate', 'TBS I',
       'TBS II', 'TBS III', 'TBS IV', 'TBS V', 'TBS VI', 'TBS III Positives',
       'TBS III + Rate', 'ratio category I', 'ratio category II',
       'ratio category III', 'ratio category IV', 'ratio category V',
       'ratio category VI', 'TBS III Call Rate'],
      dtype='object')

In [259]:
data.head()

Unnamed: 0,Pathologists,I,II,III,IV,V,VI,Cases,Positives,Currently Negative,...,TBS VI,TBS III Positives,TBS III + Rate,ratio category I,ratio category II,ratio category III,ratio category IV,ratio category V,ratio category VI,TBS III Call Rate
0,SP1,109,835,132,14,6,39,1135,42,21.0,...,0.034,34,0.258,,,,,,,0.116
1,SP2,101,707,329,21,7,59,1224,77,43.0,...,0.048,64,0.195,,,,,,,0.269
2,JP1,6,44,17,2,0,2,71,6,3.0,...,0.028,4,0.235,,,,,,,0.239
3,JP2,11,90,23,3,0,6,133,3,1.0,...,0.045,2,0.087,,,,,,,0.173
4,JP3,6,51,8,0,1,2,68,0,0.0,...,0.029,0,0.0,,,,,,,0.118


In [348]:
def compare_ratios(dataframe):
        return make_stacked_bar(dataframe,"Pathologists",["TBS "+make_roman(i)  for i in range(1,7)], "Category Distribution By Pathologist","Rate")

In [261]:
counting_data=count_data(df)

In [262]:
counting_data.head()

Unnamed: 0,Pathologists,I,II,III,IV,V,VI,Cases,Positives,Currently Negative,...,TBS VI,TBS III Positives,TBS III + Rate,ratio category I,ratio category II,ratio category III,ratio category IV,ratio category V,ratio category VI,TBS III Call Rate
0,SP1,109,835,132,14,6,39,1135,42,21.0,...,0.034,34,0.258,,,,,,,0.116
1,SP2,101,707,329,21,7,59,1224,77,43.0,...,0.048,64,0.195,,,,,,,0.269
2,JP1,6,44,17,2,0,2,71,6,3.0,...,0.028,4,0.235,,,,,,,0.239
3,JP2,11,90,23,3,0,6,133,3,1.0,...,0.045,2,0.087,,,,,,,0.173
4,JP3,6,51,8,0,1,2,68,0,0.0,...,0.029,0,0.0,,,,,,,0.118


In [249]:
counting_data.columns

Index(['Pathologists', 'I', 'II', 'III', 'IV', 'V', 'VI', 'Cases', 'Positives',
       'Currently Negative', 'Positives or CN', 'positive_rate', 'TBS I',
       'TBS II', 'TBS III', 'TBS IV', 'TBS V', 'TBS VI', 'TBS III Positives',
       'TBS III + Rate', 'ratio category I', 'ratio category II',
       'ratio category III', 'ratio category IV', 'ratio category V',
       'ratio category VI', 'TBS III Call Rate'],
      dtype='object')

In [273]:
counting_data.columns

Index(['Pathologists', 'I', 'II', 'III', 'IV', 'V', 'VI', 'Cases', 'Positives',
       'Currently Negative', 'Positives or CN', 'positive_rate', 'TBS I',
       'TBS II', 'TBS III', 'TBS IV', 'TBS V', 'TBS VI', 'TBS III Positives',
       'TBS III + Rate', 'ratio category I', 'ratio category II',
       'ratio category III', 'ratio category IV', 'ratio category V',
       'ratio category VI', 'TBS III Call Rate'],
      dtype='object')

In [349]:
figura=compare_ratios(counting_data)

ValueError: All arguments should have the same length. The length of argument `text` is 6, whereas the length of  previously-processed arguments ['Pathologists'] is 11

In [343]:
counting_data.shape

(11, 27)

In [345]:
figura

In [311]:


# Create example data
data = pd.DataFrame({
    'category': ['A', 'B', 'C'],
    'value1': [10, 20, 30],
    'value2': [20, 30, 10],
})

# Create stacked bar chart
fig = px.bar(data, x='category', y=['value1', 'value2'], barmode='stack',
             text=[None, ("hola","hello"), None])

# Display chart
fig.show()

In [304]:
import plotly.express as px

data = px.data.gapminder()
fig = px.bar(data_frame=data, x="continent", y="pop", color="continent",
             text=[f"{x:,.0f}" for x in data["pop"]])
fig.show()

In [305]:
data

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.853030,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.100710,AFG,4
3,Afghanistan,Asia,1967,34.020,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,ZWE,716
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716


In [301]:
import plotly.express as px
mydf = px.data.medals_long()

fig = px.bar(mydf, x="medal", y="count", color="nation", text="nation")
fig.show()

In [302]:
mydf

Unnamed: 0,nation,medal,count
0,South Korea,gold,24
1,China,gold,10
2,Canada,gold,9
3,South Korea,silver,13
4,China,silver,15
5,Canada,silver,12
6,South Korea,bronze,11
7,China,bronze,8
8,Canada,bronze,12


In [327]:
df1 = pd.DataFrame()
df1['x'] = ['Product A', 'Product B', 'Product C']
df1['z'] = ['T1','T1','T1']
df1['y'] = [20, 14, 23]

df2 = pd.DataFrame()
df2['x'] = ['Product A', 'Product B', 'Product C']
df2['z'] = ['T2','T2','T2']
df2['y'] = [40, 10, 55]

df = df1.append(df2)

fig = px.bar(df,
            y="x",
            x="y",
            text='y',
            color='z',barmode='stack')
fig.show()


# fig.for_each_trace(lambda t: print(t))
fig.for_each_trace(lambda t: t.update(text = []) if t.name not in ['T2'] else ())
fig.show()


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

