In [23]:
import pandas as pd
import pathlib
import yaml
from functions import *
from collections import Counter

import plotly.graph_objects as go

# Path
BASE_PATH = pathlib.Path().resolve()
DATA_PATH = BASE_PATH.joinpath('data').resolve()

# Read Data
df = pd.read_csv(DATA_PATH.joinpath('df.csv'), delimiter=';', skiprows=4, na_values='#')
df['index'] = range(1, len(df) + 1)

with open(DATA_PATH.joinpath('config.yml')) as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    yaml_list = yaml.safe_load(file)
    completeness_cols = yaml_list['completeness']

# Clean column names and parse date columns
df.columns = parse_column_names(df)

date_cols = returnDateCols(df, threshold=0.5, sample_size=1000)
df[date_cols] = df[date_cols].apply(pd.to_datetime, errors='coerce')

# Completeness Frame and Score
compl_frame, compl_array = dim_completeness(df, completeness_cols)

In [31]:
df_new = pd.concat([df['notiftype'], compl_frame], axis=1)
df_new.set_index('notiftype', inplace=True)

values = df['notiftype'].unique()

counter = []
for v in values:
    c = Counter(df_new.loc[v].values.flatten())
    counter.append(c)
    
data = []
for i in range(len(counter)):
    d = {
        "values": list(counter[i].values()),
        "labels": list(counter[i].keys()),
        "domain": {"column": 0},
        "name": values[i],
        "hole": .4,
        "type": "pie"
    }
    data.append(d)

layout = go.Layout(
    {
        "title": "Completeness per Notification Type",
        "grid": {"rows": 1, "columns": 2},
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "K1",
                "x": 0.23,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "KT",
                "x": 0.77,
                "y": 0.5
            }
        ]
    }
)

In [33]:
data

[{'values': [69755, 12877],
  'labels': [False, True],
  'domain': {'column': 0},
  'name': 'KT',
  'hole': 0.4,
  'type': 'pie'},
 {'values': [200159, 11779],
  'labels': [False, True],
  'domain': {'column': 0},
  'name': 'K1',
  'hole': 0.4,
  'type': 'pie'}]