In [1]:
from os import getcwd
from os.path import join
from pathlib import Path
import sys
sys.path.insert(0, join(Path(getcwd()).parent.absolute(), 'src'))
import fuzzyfication as fz
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data_path = '../data/hcvdat0.csv'
df = pd.read_csv(data_path, index_col=0)
df[['Category', 'Category_name']] = df.apply(lambda x: x.Category.split('='), axis=1, result_type='expand')
df = df.drop(columns=['ALB', 'CHOL', 'CHE', 'CREA', 'PROT'])
df.head()

Unnamed: 0,Category,Age,Sex,ALP,ALT,AST,BIL,GGT,Category_name
1,0,32,m,52.5,7.7,22.1,7.5,12.1,Blood Donor
2,0,32,m,70.3,18.0,24.7,3.9,15.6,Blood Donor
3,0,32,m,74.7,36.2,52.6,6.1,33.2,Blood Donor
4,0,32,m,52.0,30.6,22.6,18.9,33.8,Blood Donor
5,0,32,m,74.1,32.6,24.8,9.6,29.9,Blood Donor


In [3]:
df.Category_name.unique()

array(['Blood Donor', 'suspect Blood Donor', 'Hepatitis', 'Fibrosis',
       'Cirrhosis'], dtype=object)

In [7]:
def plot_distribution(data, feature, sex_diff, *funcs):
    if sex_diff:
        males = data[data.Sex == 'm']
        females = data[data.Sex == 'f']
        values = [('m', males.sort_values(feature)), ('f',females.sort_values(feature))]
    else:
        values = [('', df.sort_values(feature))]
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    for s, vals in values:
        for name, fun in funcs:
            # print(base_values.apply(fun, axis=1))
            fig.add_trace(
                go.Scatter(
                    x=vals[feature],
                    y=vals.apply(fun, axis=1).to_list(),
                    name=f'{name} {s}'
                )
            )
    for s, vals in values:
        fig.add_trace(
            go.Histogram(
                x=vals[feature],
                name=f'{s} distribution',
                opacity=0.3,
                # marker=dict(
                #     color='orange',
                # )
            ),
            secondary_y=True,
        )
    fig.update_layout(
        title_text=feature,
    )
    fig.show()

In [8]:
def plot_feature(feature):
    sex_diff = False

    if feature == 'ALT' or feature=='AST':
        good, bad, vbad = fz.fuzzy_alt_ast(feature)
        funcs = [('good', good), ('bad', bad), ('very bad', vbad)]
    else:
        if feature == 'GGT':
            good, bad = fz.fuzzy_ggt()
            sex_diff = True
        else:
            good, bad= fz.fuzzy_bil_alp(feature)
        funcs = [('good', good), ('bad', bad)]

    plot_distribution(df, feature, sex_diff, *funcs)

In [9]:
features = ['GGT', 'ALT', 'AST', 'BIL', 'ALP']
for feature in features:
    plot_feature(feature)