In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import os

from src.DB.Component import Component
from src.Utils.latex_vars import LatexVars
from src.config import LATEX_DIR

latexVars = LatexVars(LATEX_DIR)
print("LatexVars at: ", latexVars.file_path)

In [None]:
components = await Component.find_all().to_list()
latexVars["numComponents"] = len(components)

In [None]:
from PyPDF2 import PdfReader
from tqdm.auto import tqdm

pdf_pages = []
for c in tqdm(components):
    try:
        reader = PdfReader(c.pdf_path)
        num_pages = len(reader.pages)
        pdf_pages.append(num_pages)
    except Exception as e:
        print(f"Error processing {c.pdf_path}: {e}")
        continue
latexVars["numPDFPages"] = sum(pdf_pages)
latexVars["minNumPDFPages"] = min(pdf_pages)
latexVars["maxNumPDFPages"] = max(pdf_pages)
latexVars["avgNumPDFPages"] = round(sum(pdf_pages) / len(pdf_pages), 2)

In [None]:
# Get total number of extracted pins
from src.DB.PipelineGrade import PipelineGrade

pipeline_grades = await PipelineGrade.find_all().to_list()
latexVars["numExtractedPins"] = sum([pg.num_llm_pins for pg in pipeline_grades if pg.num_llm_pins is not None])
latexVars["numPipelines"] = len(pipeline_grades)

In [None]:
pin_count = [len(c.pins) for c in components]
print("number of pins:", sum(pin_count))
latexVars["numPins"] = sum(pin_count)
latexVars["maxNumPins"] = max(pin_count)
latexVars["minNumPins"] = min(pin_count)
latexVars["avgNumPins"] = round(sum(pin_count) / len(pin_count), 2)



In [None]:
from collections import Counter

# Unify manufacturers
manufacturer_counter = Counter()

for component in components:
    manufacturer_counter[component.manufacturer] += 1

# Sort manufacturers by name
set_manufacturers = sorted(manufacturer_counter.keys())

# Display manufacturer names and counts
for m in set_manufacturers:
    print(f"{m}: {manufacturer_counter[m]}")

In [None]:
set_types = set()
for component in components:
    set_types.add(component.type)

def convert_type(component_type):
    if "sensor" in component_type.lower():
        return "Sensor"
    if "mcu" in component_type.lower():
        return "MCU"
    else:
        raise ValueError(f"Unknown type: {component_type}")

## Histogram with length of pin-lists

In [None]:
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import os
import matplotlib.ticker as mtick

# Count the number of components for each pin count
pin_lengths = Counter([len(c.pins) for c in components])

# Define custom bin ranges
bin_ranges = [(0, 4), (4, 8), (8, 12), (12, 16), (16, 20),
              (20, 30), (30, 50)]

# Compute labels
labels = []
for lower, upper in bin_ranges:
    if upper == float('inf'):
        labels.append(f"{lower}+")
    else:
        labels.append(f"{lower} – {int(upper) - 1}")

# Count components in each range
group_counts = [
    sum(v for k, v in pin_lengths.items() if lower <= k < upper)
    for lower, upper in bin_ranges
]

# Apply seaborn style
sns.set_theme(style="white")

# Create figure
plt.figure(figsize=(9, 6))
barplot = sns.barplot(
    x=labels,
    y=[int(x) for x in group_counts],
    # palette="pastel",
    edgecolor="black"
)

# Custom font size
font_size = 26
barplot.set_ylabel("# Components", fontsize=font_size)
# barplot.set_xlabel("")  # You can add a label here if needed
barplot.set_xticklabels(labels, rotation=45, fontsize=font_size)
barplot.set_yticklabels(barplot.get_yticks(), fontsize=font_size)

ax = plt.gca()
ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%d'))
ax.tick_params(axis='y', labelsize=font_size)

ax.set_xlabel("# Pins", fontsize=font_size)


# Remove top/right spines
sns.despine()

# Tight layout for saving
plt.tight_layout()

# Save figure
file_path = os.path.join(LATEX_DIR, "figures", "pin_counts.pdf")
os.makedirs(os.path.dirname(file_path), exist_ok=True)
plt.savefig(file_path, bbox_inches="tight")

## Bar chart with manufacturers

In [None]:
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
import os
import matplotlib.ticker as mtick

# Set threshold for minimum number of components per manufacturer
threshold = 4  # Adjust as needed

# Count components per (unified) manufacturer
manufacturer_counts = Counter(c.manufacturer for c in components)

# Separate into main manufacturers and "Other"
main_counts = {}
other_count = 0

for manufacturer, count in manufacturer_counts.items():
    if count >= threshold:
        main_counts[manufacturer] = count
    else:
        other_count += count

# Add "Other" if there are any below-threshold counts
if other_count > 0:
    main_counts["Other"] = other_count

# Sort by count (ascending)
sorted_counts = dict(sorted(main_counts.items(), key=lambda x: x[1], reverse=True))

# Apply seaborn style (for consistency with pin counts chart)
sns.set_theme(style="white")

# Create figure
plt.figure(figsize=(12, 8))

# Custom font size for consistency
font_size = 32

# Create horizontal bar plot
barplot = sns.barplot(
    y=list(sorted_counts.keys()),
    x=list(sorted_counts.values()),
    edgecolor="black",
    orient="h"
)

# Set labels
barplot.set_xlabel("# Components", fontsize=font_size)
# barplot.set_ylabel("")  # You can leave this empty or add a title

# Format x-axis to integers only
ax = plt.gca()
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter('%d'))
ax.tick_params(axis='both', labelsize=font_size)

ax.set_ylabel("Manufacturer", fontsize=font_size)

# Remove top/right spines (consistent with seaborn style)
sns.despine()

# Tight layout for saving
plt.tight_layout()

# Save figure
plt.savefig(os.path.join(LATEX_DIR, "figures", "manufacturers.pdf"), bbox_inches="tight")