In [1]:
import pandas as pd
import numpy as np
import re
import os

In [2]:
def compute_on_time_percent(filename):
    
    #open file, get text
    with open(filename, 'r') as file:
        og_text = file.read()
    
    text = og_text.split("\n")
    
    #filter to get what we want 
    filtered_array = [element for element in text if "Late Orders" in element]
    filtered_array = list(filter(lambda x: ("Late Orders" in x or "On Time Orders" in x) and "Private Label" not in x, text))
    filtered_array
    
    #convert to DF
    order_data = [re.split(r'\s{2,}', elem.strip())[:2] for elem in filtered_array]
    df = pd.DataFrame(order_data, columns=["Order Type", "Count"])
    df['Count'] = df['Count'].astype(int)
    
    #make edits to DF
    df_even = df.iloc[::2].reset_index(drop=True)
    df_odd = df.iloc[1::2].reset_index(drop=True)
    df_merge = pd.concat([df_even, df_odd], axis=1)
    df_merge = df_merge.rename(columns={'Order Type': 'Late', 'Count': 'OT_Count'})
    new_columns = ['Late', 'Late_Count', 'On_Time', 'OT_Count']
    df_merge.columns = new_columns
    
    #Calculate percentages
    df_merge["Sum"] = df_merge["Late_Count"] + df_merge["OT_Count"]
    df_merge["OT_Percent"] = df_merge["OT_Count"] / df_merge["Sum"]
    
    #Compute average
    on_time_avg = df_merge['OT_Percent'].mean()
    on_time_avg = on_time_avg = round(on_time_avg, 4)
 
    return on_time_avg

In [3]:
results = []
filenames = ['inputs/' + f for f in os.listdir("inputs/") if os.path.isfile(os.path.join("inputs/", f)) and f.endswith(".txt")]
for fn in filenames: 
    
    percent_on_time = compute_on_time_percent(fn)
    
    results.append({"filename": fn, "percent_on_time": percent_on_time})
    
    

In [4]:
final_df = pd.DataFrame(results)
final_df

Unnamed: 0,filename,percent_on_time
0,inputs/extra_m_and_g.txt,0.7056
1,inputs/extra_8k_hold_tank.txt,0.5717
2,inputs/extra_mandg_and_extra_bucketline.txt,0.7056
3,inputs/2_extra_bucket_lines.txt,0.6629
4,inputs/extra_quart_line.txt,0.9972
5,inputs/extra_20k_hold_tank.txt,0.9466
6,inputs/3_extra_bucket_lines.txt,0.6629
7,inputs/extra_bucket.txt,0.6629
