# N02-Facts_postprocessing
*Purpose*: After using deepseek-R1 to convert raw shark tank episode transcript to facts dictionary (with 5 categories), the LLM facts dictionary output might not be consistent. E.g. the key names for the dictionary might be slightly varied (Product description vs Summary of Product). Hence, some postprocessing is required to ensure the output facts dictionary is consistent for all episodes

In [1]:
import numpy as np
import pandas as pd
import json
from pathlib import Path

In [11]:
# read all facts that has already been processed 
facts_store = {}
folder_path = Path("./facts")
for file in folder_path.glob("*.txt"):
    with file.open("r", encoding="utf-8") as f:
        facts_store[file.name] = f.read()
print(f'{len(facts_store)} Facts found in folder: {facts_store.keys()}')

119 Facts found in folder: dict_keys(['facts_shark_tank_transcript_0_GarmaGuard.txt', 'facts_shark_tank_transcript_0_Roadie.txt', 'facts_shark_tank_transcript_0_TouchUp Cup.txt', 'facts_shark_tank_transcript_10_Deux.txt', 'facts_shark_tank_transcript_10_Fish Fixe.txt', 'facts_shark_tank_transcript_10_Hidrent.txt', 'facts_shark_tank_transcript_11_Dude Wipes.txt', 'facts_shark_tank_transcript_11_Love & Pebble.txt', 'facts_shark_tank_transcript_11_Pink Picasso.txt', 'facts_shark_tank_transcript_11_Sheets Laundry Club.txt', 'facts_shark_tank_transcript_11_Zach & Zoe Sweet Bee Farm.txt', 'facts_shark_tank_transcript_12_Elf Grams.txt', 'facts_shark_tank_transcript_12_Ornament Anchor.txt', "facts_shark_tank_transcript_12_Santa's Enchanted Mailbox.txt", "facts_shark_tank_transcript_12_Wendy's Gnome Shop.txt", 'facts_shark_tank_transcript_13_Banana Loca.txt', 'facts_shark_tank_transcript_13_Liberate.txt', 'facts_shark_tank_transcript_13_MAXPRO SmartConnect.txt', 'facts_shark_tank_transcript_13_

In [12]:
# rename each category to standardized naming
category_checks = ['facts', 'product_description', 'pitch', 'initial', 'final']  # to check if the category exists in LLM output
category_final_names = dict(zip(category_checks, ['facts', 'product_description', 'pitch_summary', 'initial_offer', 'final_offer']))  # standardized category naming
error_store = {}  # check if any facts has error in categories
processed_facts_store = {}
for fact_name, fact in facts_store.items():
    fact = json.loads(fact)
    errors = []
    processed_facts = {}

    if len(fact) != 5:  # not 5 categories
        errors.append('not_5_cat')

    for category_check in category_checks:
        fact_check = [x for x in fact.keys() if category_check in x]
        if len(fact_check)!=1:  # missing category or too many of the same category
            errors.append(category_check)
        else:  # no problem, store results
            processed_facts[category_final_names[category_check]] = fact[fact_check[0]]

    if len(errors) == 0:
        processed_facts_store[fact_name] = processed_facts.copy()
    else:
        print(f'Error found for {fact_name}: {errors}')
        error_store[fact_name] = errors


print(f'{len(error_store)} Errors found: {error_store.keys()}')           
    
        

0 Errors found: dict_keys([])


In [14]:
# save summarized facts dict
with open(f'./all_processed_facts.txt', "w", encoding="utf-8") as file:
            json.dump(processed_facts_store, file, indent=4)
processed_facts_store

{'facts_shark_tank_transcript_0_GarmaGuard.txt': {'facts': {'sales_to_date': '$476,000',
   'time_in_business': '1.5 years',
   'current_year_sales_projection': '$500,000',
   'current_year_profitability': '$100,000 (20% margin)',
   'product_price': '$12.99',
   'production_cost': '$1.85',
   'customer_base': '14,000 loyal customers',
   'conversion_rate': '6.6%',
   'return_customer_rate': '33%'},
  'product_description': {'name': 'GarmaGuard',
   'type': 'Natural garment and fabric cleanser',
   'functionality': 'Uses natural propellants to eliminate odor, freshen fabric, and control dirt and grime. It kills 99% of germs and odor-causing bacteria.',
   'application': 'Spray directly onto clothes.',
   'unique_selling_point': 'First of its kind, designed for on-the-go use to keep clothes fresh and clean without the need for frequent washing or dry cleaning.'},
  'pitch_summary': {'delivery': "The entrepreneurs, Pete and Bianca, presented their pitch with a narrative involving a chara

TypeError: unhashable type: 'list'