In [131]:
import pandas as pd
import plotly.express as px

df = pd.read_csv("trial.csv")

risk_dict = {}
for patient, group in df.groupby('pmtct_mother'):
    if 'Y' in group.iloc[:, 2:].values:
        risk_dict[patient] = "HIGH RISK"
    else:
        risk_dict[patient] = "STABLE"

risk_df = pd.DataFrame.from_dict(risk_dict, orient='index', columns=['Risk'])
grouped = risk_df['Risk'].value_counts().reset_index().rename(columns={'index': 'Risk', 'Risk': 'Number of patients'})
grouped.loc[len(grouped)] = ['All', len(risk_df)]
grouped = grouped.sort_values('Number of patients', ascending=False)

color_dict = {"All": "blue", "STABLE": "green", "HIGH RISK": "red"}
fig = px.bar(grouped, x="Risk", y="Number of patients", text='Number of patients',
             color="Risk", color_discrete_map=color_dict,
             title="HIGH RISK vs STABLE")
fig.show()







In [132]:
risk_df

Unnamed: 0,Risk
36bbe07c-69da-4b99-ad1a-46dd7cd94340,HIGH RISK
73f55e1b-a749-4631-90b6-a7bf04b91a83,STABLE
cbc64648-b0b7-4004-884d-567ff46e27ac,STABLE


In [113]:
import pandas as pd
import numpy as np
import plotly.express as px
df=pd.read_csv("trial.csv")

dicts = {}
for patient in df['pmtct_mother'].unique():
    a= df[df['pmtct_mother']==patient]
    for i in a.columns[2:]:
        if "Y" in list(a[i].unique()):
            dicts[patient] = "HIGH RISK"
            break
        elif "Y" not in list(a[i].unique()):
            dicts[patient] = "STABLE"

risk_df=pd.DataFrame(dicts.items(),columns=['Number of patients','Risk'])

grouped = risk_df.groupby('Risk')['Number of patients'].nunique().reset_index()
grouped.loc['total'] = grouped.sum(numeric_only=True, axis=0)
grouped['Risk']=grouped['Risk'].replace(np.nan,"All")
grouped=grouped.sort_values('Number of patients',ascending=False)

color_dict = {
    "All": "blue",
    "STABLE": "green",
    "HIGH RISK": "red"
}
fig=px.bar(grouped,x="Risk",y="Number of patients",text='Number of patients',
           color="Risk",color_discrete_map=color_dict,
           title="HIGH RISK vs STABLE")
fig.show()

In [110]:
def show_stability(stability_df):
    dicts = {}
    for patient in stability_df['pmtct_mother'].unique():
        a = stability_df[stability_df['pmtct_mother'] == patient]
        for i in a.columns[2:]:
            if "Y" in list(a[i].unique()):
                dicts[patient] = "HIGH RISK"
                break
            elif "Y" not in list(a[i].unique()):
                dicts[patient] = "STABLE"

    risk_df = pd.DataFrame(dicts.items(), columns=['Number of patients', 'Risk'])

    grouped = risk_df.groupby('Risk')['Number of patients'].nunique().reset_index()
    grouped.loc['total'] = grouped.sum(numeric_only=True, axis=0)
    grouped['Risk'] = grouped['Risk'].replace(np.nan, "All")
    grouped = grouped.sort_values('Number of patients', ascending=False)

    # Define a dictionary for the colors to use in the plot and create the plot using Plotly Express
    color_dict = {"All": "blue", "STABLE": "green", "HIGH RISK": "red"}
    print("GROUPED:::::::::::::::::::::::::::::::::::::::::::::::::::")
    print(grouped)
    fig = px.bar(grouped, x="Risk", y="Number of patients", text='Number of patients',
                 color="Risk", color_discrete_map=color_dict,
                 title="HIGH RISK vs STABLE", height=350)
    fig.update_layout(legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ))
    # Set the font size of the x-axis and y-axis labels
    fig.update_layout(
        xaxis=dict(
            tickfont=dict(
                size=10
            ),
            title_font=dict(
                size=10
            )
        ),
        yaxis=dict(
            title_font=dict(
                size=10
            )
        ),
        legend=dict(
            font=dict(
                size=10
            )
        ),
        title=dict(
            # text="My Line Chart",
            font=dict(
                size=12
            )
        )
    )
    fig.show()

In [111]:
show_stability(df)

GROUPED:::::::::::::::::::::::::::::::::::::::::::::::::::
            Risk  Number of patients
0      HIGH RISK                 2.0
total        All                 2.0


In [112]:
risk_df

Unnamed: 0,Number of patients,Risk
0,36bbe07c-69da-4b99-ad1a-46dd7cd94340,HIGH RISK
1,cbc64648-b0b7-4004-884d-567ff46e27ac,HIGH RISK


In [138]:
num_patients_not_in_data_info=7
d={"Not cagetorized":num_patients_not_in_data_info}
not_cat_df=pd.DataFrame.from_dict(d, orient='index', columns=['Risk categorization'])

Unnamed: 0,Risk categorization
Not cagetorized,7


In [114]:
grouped

Unnamed: 0,Risk,Number of patients
0,HIGH RISK,2.0
total,All,2.0


In [118]:
total_patients = len(risk_df)
grouped['Percentage'] = round((grouped['Number of patients'] / total_patients) * 100,1)
grouped

Unnamed: 0,Risk,Number of patients,Percentage
2,All,3,100.0
0,STABLE,2,66.7
1,HIGH RISK,1,33.3


In [129]:
a=grouped[grouped['Risk']=="HIGH RISK"]
a['Percentage']

1    33.3
Name: Percentage, dtype: float64

In [122]:
grouped[grouped['Risk']=="STABLE"]['Percentage'][0]

66.7

In [130]:
df

Unnamed: 0.1,Unnamed: 0,pmtct_mother,baseline_assessment,early_anc,mid_anc,late_gestation,six_weeks_assessment,fourteen_weeks_assessment,six_month_assessment,nine_month_assessment,twelve_month_assessment,eighteen_month_assessment,twenty_four_month_assessment
0,0,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,Y,N,,,,,,,,
1,1,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,N,,,,,,,,,
2,2,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,,,,,,,,,,
3,3,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,,Y,,,,,,,,
4,4,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,,,,,,,,,,
5,5,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,,,,,,,,,,
6,6,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,,Y,,,,,,,,
7,7,36bbe07c-69da-4b99-ad1a-46dd7cd94340,Y,Y,,,,,,,,,
8,8,36bbe07c-69da-4b99-ad1a-46dd7cd94340,,N,,,,,,,,,
9,9,cbc64648-b0b7-4004-884d-567ff46e27ac,N,,,,,,,,,,


Unnamed: 0,0,1
0,Not cagetorized,7


In [139]:
data = {'Risk categorization': ['STABLE', 'HIGH RISK', 'Not categorized', 'Total categorized'],
        'Number of patients': [1.0, 1.0, 6.0, 2.0]}
df = pd.DataFrame(data)
df

Unnamed: 0,Risk categorization,Number of patients
0,STABLE,1.0
1,HIGH RISK,1.0
2,Not categorized,6.0
3,Total categorized,2.0


In [140]:
# calculate the sum of patients in the not categorized and total categorized groups
sum_not_categorized = df.loc[df['Risk categorization'] == 'Not categorized', 'Number of patients'].sum()
sum_total_categorized = df.loc[df['Risk categorization'] == 'Total categorized', 'Number of patients'].sum()
df

Unnamed: 0,Risk categorization,Number of patients
0,STABLE,1.0
1,HIGH RISK,1.0
2,Not categorized,6.0
3,Total categorized,2.0


In [141]:
# create a new dataframe with the total row
total_row = {'Risk categorization': 'Total pts', 'Number of patients': sum_not_categorized + sum_total_categorized}
df_total = pd.DataFrame(total_row, index=[len(df)])
df = pd.concat([df, df_total])
df

Unnamed: 0,Risk categorization,Number of patients
0,STABLE,1.0
1,HIGH RISK,1.0
2,Not categorized,6.0
3,Total categorized,2.0
4,Total pts,8.0
