# Enrich data with context attributes

In [143]:
import pandas as pd
import pm4py

import requests
from PyPDF2 import PdfReader
from io import BytesIO
import ast

In [144]:
'''
INPUT_FILE_NAME = 'berlin-preprocessed-gesetzgebung-2006-2020.xes'
OUTPUT_FILE_NAME = 'berlin-preprocessed-gesetzgebung-2006-2020_with_context.xes'
INPUT_FILE_NAME_EARLIER = 'berlin-gesetzgebung-contained-2006-2020_with_context_2.xes'
'''

INPUT_FILE_NAME = 'baden-württemberg-preprocessed-gesetzgebung-gesetz-2006-2020_processed.xes'
OUTPUT_FILE_NAME = 'baden-württemberg-preprocessed-gesetzgebung-gesetz-2006-2020_processed_with_context.xes'
INPUT_FILE_NAME_EARLIER = 'baden-württemberg-gesetzgebung-contained-2006-2020_with_context_2.xes'

'''
INPUT_FILE_NAME = 'brandenburg-preprocessed-gesetzgebung-2006-2020_processed.xes'
OUTPUT_FILE_NAME = 'brandenburg-preprocessed-gesetzgebung-2006-2020_processed_with_context.xes'
INPUT_FILE_NAME_EARLIER = 'brandenburg-gesetzgebung-contained-2006-2020_with_context_2.xes'
'''

#bundeslandCapitalized = "Berlin"
bundeslandCapitalized = "Baden-Württemberg"
#bundeslandCapitalized = "Brandenburg"


CASE_ID_COL = 'case:concept:name'
TIMESTAMP_COL = 'time:timestamp'
ACTIVITY_COL = 'concept:name'

# Define election years
election_years = {
    'berlin': [1990, 1995, 1999, 2001, 2006, 2011, 2016, 2021, 2023],
    'brandenburg': [1990, 1994, 1999, 2004, 2009, 2014, 2019],
    'baden-württemberg': [1992, 1996, 2001, 2006, 2011, 2016, 2021]
}

passed_bills_activities_berlin = ['Gesetz- und Verordnungsblatt', 'Bekanntmachung (Gesetz- und Verordnungsblatt)']
passed_bills_activities_bawue = ["Gesetz", "Gesetzblatt für Baden-Württemberg"]
passed_bills_activities_brandenburg = ["Gesetz", "Gesetz- und Verordnungsblatt"]

possible_passed_bills_activities = passed_bills_activities_berlin + passed_bills_activities_bawue + passed_bills_activities_brandenburg

In [145]:
df = pm4py.read_xes(INPUT_FILE_NAME)
print(len(df), 'rows read from', INPUT_FILE_NAME)
df = df.sort_values(TIMESTAMP_COL, ignore_index=True)
#df.info()
#df.head()

df_earlier = pm4py.read_xes(INPUT_FILE_NAME_EARLIER)

parsing log, completed traces ::   0%|          | 0/1005 [00:00<?, ?it/s]

6269 rows read from baden-württemberg-preprocessed-gesetzgebung-gesetz-2006-2020_processed.xes


parsing log, completed traces ::   0%|          | 0/1190 [00:00<?, ?it/s]

### Attributes/Context to add:

* start_month **[DONE]** -> instead of Parlamentsferien (could also be looked up)
* start_weekday **[DONE]**
* count_urheber **[DONE]**
* WIP **[DONE]**
* Bytes of PDF file that is linked to first activity **[DONE]**
* Professionalism data (Appeldorn + Fortunato) **[DONE]**
* is_election_year **[DONE]**


In [146]:

def add_case_attribute(df, id_value_obj, attribute_name):
    df[attribute_name] = df[CASE_ID_COL].map(id_value_obj)

def get_month_attribute():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        month = df[TIMESTAMP_COL][i].month
        if case_id not in result_obj:
            result_obj[case_id] = month
    return result_obj

def get_weekday_attribute():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        weekday = df[TIMESTAMP_COL][i].weekday()
        if case_id not in result_obj:
            result_obj[case_id] = weekday
    return result_obj

# just get the urheber of the first activity as a case attribute
def get_urheber_first_activity():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        if case_id not in result_obj:
            urheber = df["Urheber"][i]
            result_obj[case_id] = urheber
    return result_obj

def get_urheber_count_first_activity():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        urheber = df["Urheber"][i]
        if case_id not in result_obj:
            if (urheber == "nan"): # TODO: what does nan mean here??
                result_obj[case_id] = None
            else:
                result_obj[case_id] = urheber.count(',') + 1 if isinstance(urheber, str) else 0
    return result_obj


def get_case_times_helper():
    result_obj = {}
    for case_id in df[CASE_ID_COL].unique():
        case_df = df[df[CASE_ID_COL] == case_id]
        start_time = case_df[TIMESTAMP_COL].min()
        end_time = case_df[TIMESTAMP_COL].max()
        result_obj[case_id] = {'start_time': start_time, 'end_time': end_time}
    return result_obj

def get_is_election_year():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        year = df[TIMESTAMP_COL][i].year
        if case_id not in result_obj:
            if (year in election_years[bundeslandCapitalized.lower()]):
                result_obj[case_id] = 1
            else:
                result_obj[case_id] = 0
    return result_obj

def get_WIP_during_start():
    # Get start_time and end_time for each case
    case_times = get_case_times_helper()
    # Create a DataFrame from case_times
    case_df = pd.DataFrame.from_dict(case_times, orient="index").reset_index()
    case_df.columns = [CASE_ID_COL, "start_time", "end_time"]
    
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        start_time = df[TIMESTAMP_COL][i]
        if case_id not in result_obj:
            result_obj[case_id] = ((case_df["start_time"] <= start_time) & (case_df["end_time"] > start_time)).sum()
    return result_obj

def get_pdf_information_helper(url):
    
    # handle case where there is a list of documents
    # for example pdf and docx
    if (url.startswith("[") and url.endswith("]")):
        print(url)
        filenames = ast.literal_eval(url)
        for filename in filenames:
            if filename.endswith(".pdf"):
                url = filename
                break
            
    try:
        # Get the PDF file as bytes
        print("getting file...")
        response = requests.get(url, timeout=60)
    except requests.exceptions.Timeout:
        print("The request timed out")
        return 0, 0
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return 0, 0 

    if response.status_code == 200:
        print("got file...")
        pdf_bytes = response.content

        try:
            # Load PDF into PyPDF2 PdfReader
            pdf_reader = PdfReader(BytesIO(pdf_bytes))
            
            # Extract text and calculate word count
            total_word_count = 0
            for page in pdf_reader.pages:
                text = page.extract_text()
                if text:  # Ensure the page has extractable text
                    total_word_count += len(text.split())
            return len(response.content), total_word_count
        except Exception as e:
            print(f"Error processing PDF: {e}")
            return 0, 0
    else:
        print(f"Error getting PDF: {response.status_code}")
        return 0, 0

def get_pdf_information():
    result_obj_bytes = {}
    result_obj_word_count = {}
    for i in range(len(df)):
        print("i/len(df):", i, "/", len(df))
        case_id = df[CASE_ID_COL][i]
        url = df["LokURL"][i]
        if case_id not in result_obj_bytes:
            if url == "nan":
                bytes, word_count = (0, 0)
            else:
                bytes, word_count = get_pdf_information_helper(url)
                
            print({"bytes": bytes, "total_word_count": word_count})
            result_obj_bytes[case_id] = bytes
            result_obj_word_count[case_id] = word_count
    return result_obj_bytes, result_obj_word_count

def add_pdf_case_attributes(df, id_value_obj_bytes, id_value_obj_word_count, attribute_bytes, attribute_word_count):
    df[attribute_bytes] = df[CASE_ID_COL].map(id_value_obj_bytes)
    df[attribute_word_count] = df[CASE_ID_COL].map(id_value_obj_word_count)
    
def get_professionalism_data():
    # load professionalization data to compare to
    # Read the data from replication.txt using | as the separator and including the header data
    readData = pd.read_csv('../LegislativeCapacityInGermanysParliaments/replication.txt', sep='|')
    readData_af = pd.read_csv('../LegislativeCapacityInGermanysParliaments/afScores.txt', sep=' ')
    data = {}
    data_af = {}
    
    bundesland = bundeslandCapitalized
    if(bundesland == "Baden-Württemberg"):
        data[bundesland.lower()] = readData[readData['land'] == "Baden-Wurttemberg"]
        data_af[bundesland.lower()] = readData_af[readData_af['land'] == "Baden-Wurttemberg"]
    else: 
        data[bundesland.lower()] = readData[readData['land'] == bundesland]
        data_af[bundesland.lower()] = readData_af[readData_af['land'] == bundesland]

    # first, we'll specify a squire-type scale
    data[bundesland.lower()]['squire'] = 1/3 * (data[bundesland.lower()]['salary'] / data[bundesland.lower()]['fp_mp_salary_2015']) + \
                    1/3 * (data[bundesland.lower()]['staff'] / data[bundesland.lower()]['fp_staffmp_2015']) + \
                    1/6 * (data[bundesland.lower()]['commDays'] / data[bundesland.lower()]['fp_com_sessions']) + \
                    1/6 * (data[bundesland.lower()]['plenDays'] / data[bundesland.lower()]['fp_plenary_sessions'])

    return data, data_af
             

def get_professionalism_values(data, columnName):
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        year = df[TIMESTAMP_COL][i].year
        if case_id not in result_obj:
            if (data.loc[data['year'] == year, columnName].empty):
                result_obj[case_id] = None
            else:
                result_obj[case_id] = data.loc[data['year'] == year, columnName].values[0]
    return result_obj
       

def get_is_passed_bill():
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        activityType = df[ACTIVITY_COL][i]
        if activityType in possible_passed_bills_activities:
            result_obj[case_id] = 1
        elif case_id not in result_obj:
            result_obj[case_id] = 0
    return result_obj
    

In [147]:

#pdf_info_bytes, pdf_info_word_count = get_pdf_information()

def get_pdf_information_fromOtherFile_helper(case_id, df):
    entry = df[df[CASE_ID_COL] == case_id].iloc[0]
    bytes = entry.get("case:pdf_bytes")
    word_count = entry.get("case:pdf_word_count")
    return (bytes, word_count)

def add_pdf_case_attributes_otherDoc(df, id_value_obj_bytes, id_value_obj_word_count, attribute_bytes, attribute_word_count):
    print("Mapping bytes:")
    df[attribute_bytes] = df[CASE_ID_COL].astype(int).map(id_value_obj_bytes)
    print(df[[CASE_ID_COL, attribute_bytes]].head())
    
    print("Mapping word count:")
    df[attribute_word_count] = df[CASE_ID_COL].astype(int).map(id_value_obj_word_count)
    print(df[[CASE_ID_COL, attribute_word_count]].head())

def get_pdf_information_fromOtherFile(df):
    result_obj_bytes = {}
    result_obj_word_count = {}
    for i in range(len(df)):
        print("i/len(df):", i, "/", len(df))
        case_id = df[CASE_ID_COL][i]
        url = df["LokURL"][i]
        case_id_int = int(float(df[CASE_ID_COL][i]))
        if case_id_int not in result_obj_bytes:
            if url == "nan":
                bytes, word_count = (0, 0)
            else:
                bytes, word_count = get_pdf_information_fromOtherFile_helper(case_id, df)
                
            print({"bytes": bytes, "total_word_count": word_count})
            result_obj_bytes[case_id_int] = bytes
            result_obj_word_count[case_id_int] = word_count
    return result_obj_bytes, result_obj_word_count

pdf_info_bytes, pdf_info_word_count = get_pdf_information_fromOtherFile(df_earlier)

i/len(df): 0 / 5244
{'bytes': 41925, 'total_word_count': 1363}
i/len(df): 1 / 5244
i/len(df): 2 / 5244
i/len(df): 3 / 5244
i/len(df): 4 / 5244
i/len(df): 5 / 5244
i/len(df): 6 / 5244
i/len(df): 7 / 5244
i/len(df): 8 / 5244
{'bytes': 152186, 'total_word_count': 10415}
i/len(df): 9 / 5244
i/len(df): 10 / 5244
i/len(df): 11 / 5244
i/len(df): 12 / 5244
i/len(df): 13 / 5244
i/len(df): 14 / 5244
i/len(df): 15 / 5244
i/len(df): 16 / 5244
{'bytes': 512521, 'total_word_count': 1311}
i/len(df): 17 / 5244
i/len(df): 18 / 5244
i/len(df): 19 / 5244
i/len(df): 20 / 5244
i/len(df): 21 / 5244
{'bytes': 193683, 'total_word_count': 24650}
i/len(df): 22 / 5244
i/len(df): 23 / 5244
i/len(df): 24 / 5244
i/len(df): 25 / 5244
{'bytes': 134404, 'total_word_count': 15213}
i/len(df): 26 / 5244
i/len(df): 27 / 5244
i/len(df): 28 / 5244
i/len(df): 29 / 5244
{'bytes': 55206, 'total_word_count': 2977}
i/len(df): 30 / 5244
i/len(df): 31 / 5244
i/len(df): 32 / 5244
i/len(df): 33 / 5244
{'bytes': 94824, 'total_word_co

In [148]:

add_case_attribute(df, get_month_attribute(), 'case:start_month')
add_case_attribute(df, get_weekday_attribute(), "case:start_weekday")
add_case_attribute(df, get_urheber_first_activity(), "case:author_first_activity")
add_case_attribute(df, get_urheber_count_first_activity(), "case:author_first_activity_count")
add_case_attribute(df, get_WIP_during_start(), "case:WIP_during_start")
#add_pdf_case_attributes(df, pdf_info_bytes, pdf_info_word_count, "case:pdf_bytes", "case:pdf_word_count")
add_pdf_case_attributes_otherDoc(df, pdf_info_bytes, pdf_info_word_count, "case:pdf_bytes", "case:pdf_word_count")
data, data_af = get_professionalism_data()

add_case_attribute(df, get_professionalism_values(data[bundeslandCapitalized.lower()], "squire"), "case:squire")
add_case_attribute(df, get_professionalism_values(data[bundeslandCapitalized.lower()], "salary"), "case:salary")
add_case_attribute(df, get_professionalism_values(data[bundeslandCapitalized.lower()], "staff"), "case:staff")
add_case_attribute(df, get_professionalism_values(data[bundeslandCapitalized.lower()], "commDays"), "case:commDays")
add_case_attribute(df, get_professionalism_values(data[bundeslandCapitalized.lower()], "plenDays"), "case:plenDays")
add_case_attribute(df, get_professionalism_values(data_af[bundeslandCapitalized.lower()], "afScore"), "case:af_score")
add_case_attribute(df, get_is_passed_bill(), "case:is_passed_bill")


Mapping bytes:
  case:concept:name  case:pdf_bytes
0             32019           41925
1             32029          152186
2             32043           91920
3             32043           91920
4             32019           41925
Mapping word count:
  case:concept:name  case:pdf_word_count
0             32019                 1363
1             32029                10415
2             32043                  693
3             32043                  693
4             32019                 1363


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[bundesland.lower()]['squire'] = 1/3 * (data[bundesland.lower()]['salary'] / data[bundesland.lower()]['fp_mp_salary_2015']) + \


In [149]:
df[df["case:is_passed_bill"] == 1]["case:concept:name"].nunique()

312

In [150]:
add_case_attribute(df, get_is_election_year(), "case:is_election_year")

# enrich with data from csv files created through performance analysis

frequencies = pd.read_csv('../Frequency_yearly_Gesetzgebung.csv')
cycle_times = pd.read_csv('../Cycle Time_yearly_Gesetzgebung.csv')
arrival_rates = pd.read_csv('../Inter-arrival Time_yearly_Gesetzgebung.csv')
variants = pd.read_csv('../Variants_yearly_Gesetzgebung.csv')

def get_yearly_data_from_csv(data):
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        year = df[TIMESTAMP_COL][i].year
        if case_id not in result_obj:
            if (data.loc[data['year'] == year, bundeslandCapitalized].empty):
                result_obj[case_id] = None
            else:
                result_obj[case_id] = data.loc[data['year'] == year, bundeslandCapitalized].values[0]
    return result_obj

add_case_attribute(df, get_yearly_data_from_csv(frequencies), "case:yearly_frequency")
add_case_attribute(df, get_yearly_data_from_csv(cycle_times), "case:yearly_cycle_time")
add_case_attribute(df, get_yearly_data_from_csv(arrival_rates), "case:yearly_arrival_rate")
add_case_attribute(df, get_yearly_data_from_csv(variants), "case:yearly_variants")

df.head()

Unnamed: 0,lifecycle:transition,time:timestamp,@id,ReihNr,DokArt,DokArtL,DokTypL,Desk,Titel,DokNr,...,case:staff,case:commDays,case:plenDays,case:af_score,case:is_passed_bill,case:is_election_year,case:yearly_frequency,case:yearly_cycle_time,case:yearly_arrival_rate,case:yearly_variants
0,complete,2006-01-13 01:00:00+00:00,D-73565,1,Drs,Drucksache,Gesetzentwurf,Wahlrecht,Gesetz zur Änderung des Landtagswahlgesetzes,13/5046,...,80610.859375,106.0,24,-0.27513,1,1,32,89.97,10.55,27
1,complete,2006-01-18 01:00:00+00:00,D-73599,1,Drs,Drucksache,Gesetzentwurf,Privatschule,Gesetz zur Änderung des Privatschulgesetzes un...,13/5062,...,80610.859375,106.0,24,-0.27513,1,1,32,89.97,10.55,27
2,complete,2006-01-24 01:00:00+00:00,D-73642,1,Drs,Drucksache,Gesetzentwurf,Abgeordnetengesetz (Baden-Württemberg),Gesetz zur Änderung des Abgeordnetengesetzes,13/5086,...,80610.859375,106.0,24,-0.27513,0,1,32,89.97,10.55,27
3,complete,2006-02-01 01:00:00+00:00,D-73643,2,PlPr,Plenarprotokoll,"['Aktuelle Debatte', 'Erste Beratung']",,,13/106,...,80610.859375,106.0,24,-0.27513,0,1,32,89.97,10.55,27
4,complete,2006-02-01 01:00:00+00:00,D-73566,2,PlPr,Plenarprotokoll,"['Aktuelle Debatte', 'Erste Beratung']",,,13/106,...,80610.859375,106.0,24,-0.27513,1,1,32,89.97,10.55,27


In [151]:
# one-hot encoding for urheber

all_urheber = df["Urheber"].unique()

def determine_origin_feature_name(urheber, urheber_feature_names):
    if "ausschuss" in urheber.lower():
        if "ausschuss" not in urheber_feature_names:
            urheber_feature_names.append("ausschuss")
    elif urheber.lower() not in urheber_feature_names:
        urheber_feature_names.append(urheber.lower())

urheber_feature_names = []
for urheber in all_urheber:
    if isinstance(urheber, str):
        if (urheber.startswith("[") and urheber.endswith("]")):
            urheber_list = ast.literal_eval(urheber)
            for urheber in urheber_list:
                determine_origin_feature_name(urheber, urheber_feature_names)
        else:
            determine_origin_feature_name(urheber, urheber_feature_names)


def get_urheber_first_activity(urheber_feature_name):
    result_obj = {}
    for i in range(len(df)):
        case_id = df[CASE_ID_COL][i]
        if case_id not in result_obj:
            urheber = df["Urheber"][i]
            if (urheber.startswith("[") and urheber.endswith("]")):
                urheber_list = ast.literal_eval(urheber)
                urheber_list = [u.lower() for u in urheber_list]
                if urheber_feature_name in urheber_list:
                    result_obj[case_id] = 1
                else:
                    result_obj[case_id] = 0       
            else:
                if urheber.lower() == urheber_feature_name:
                    result_obj[case_id] = 1
                else:
                    result_obj[case_id] = 0         
    return result_obj

for urheber_feature_name in urheber_feature_names:
    add_case_attribute(df, get_urheber_first_activity(urheber_feature_name), "case:author_first_activity_" + urheber_feature_name)
    

In [152]:
#activities = df[df[CASE_ID_COL] == df.iloc[3749]["case:concept:name"]]
#print(activities["time:timestamp"])

df.head()

Unnamed: 0,lifecycle:transition,time:timestamp,@id,ReihNr,DokArt,DokArtL,DokTypL,Desk,Titel,DokNr,...,case:author_first_activity_bündnis 90/die grünen (grüne),"case:author_first_activity_mack, winfried (cdu)","case:author_first_activity_untersteller, franz (grüne)","case:author_first_activity_kübler, jochen k. (cdu)","case:author_first_activity_winkler, alfred (spd)","case:author_first_activity_bullinger, friedrich (fdp/dvp)","case:author_first_activity_murschel, bernd (grüne)","case:author_first_activity_ministerium für wissenschaft, forschung und kunst",case:author_first_activity_präsidentin des landtags,case:author_first_activity_alternative für deutschland (afd)
0,complete,2006-01-13 01:00:00+00:00,D-73565,1,Drs,Drucksache,Gesetzentwurf,Wahlrecht,Gesetz zur Änderung des Landtagswahlgesetzes,13/5046,...,0,0,0,0,0,0,0,0,0,0
1,complete,2006-01-18 01:00:00+00:00,D-73599,1,Drs,Drucksache,Gesetzentwurf,Privatschule,Gesetz zur Änderung des Privatschulgesetzes un...,13/5062,...,0,0,0,0,0,0,0,0,0,0
2,complete,2006-01-24 01:00:00+00:00,D-73642,1,Drs,Drucksache,Gesetzentwurf,Abgeordnetengesetz (Baden-Württemberg),Gesetz zur Änderung des Abgeordnetengesetzes,13/5086,...,0,0,0,0,0,0,0,0,0,0
3,complete,2006-02-01 01:00:00+00:00,D-73643,2,PlPr,Plenarprotokoll,"['Aktuelle Debatte', 'Erste Beratung']",,,13/106,...,0,0,0,0,0,0,0,0,0,0
4,complete,2006-02-01 01:00:00+00:00,D-73566,2,PlPr,Plenarprotokoll,"['Aktuelle Debatte', 'Erste Beratung']",,,13/106,...,0,0,0,0,0,0,0,0,0,0


In [None]:
pm4py.write_xes(df, OUTPUT_FILE_NAME)
pm4py.write_xes(df[df["case:is_passed_bill"] == 1], OUTPUT_FILE_NAME.replace(".xes", "_passed_bills.xes"))

'''
df_passed_bills = df[df["case:is_passed_bill"] == 1]

# since proM performance spectrum miner does not like it "," needs to be removed 
df_passed_bills["concept:name"] = df_passed_bills["concept:name"].str.replace(',', '')
df_passed_bills["concept:name"] = df_passed_bills["concept:name"].str.lstrip()

pm4py.write_xes(df_passed_bills, OUTPUT_FILE_NAME.replace(".xes", "_passed_bills_performanceSpectrum.xes"))
'''

exporting log, completed traces ::   0%|          | 0/1005 [00:00<?, ?it/s]

exporting log, completed traces ::   0%|          | 0/312 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passed_bills["concept:name"] = df_passed_bills["concept:name"].str.replace(',', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_passed_bills["concept:name"] = df_passed_bills["concept:name"].str.lstrip()


exporting log, completed traces ::   0%|          | 0/312 [00:00<?, ?it/s]