In [135]:
# main libraries
import os
import json
import math
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from groq import Groq

# typing and classes
import instructor
from pydantic import BaseModel, Field
from typing import Optional, Literal, List
from enum import Enum 

# constants
load_dotenv("../.env")
GROQ_KEY = os.getenv("GROQ_KEY")
MODEL = "llama-3.1-8b-instant"

# initialisation of model
client = Groq(api_key=GROQ_KEY)
client = instructor.from_groq(client, mode=instructor.Mode.JSON)
print("Groq client initialised.")

Groq client initialised.


In [99]:
cpu_df = pd.read_csv("../../data/cpu.csv")
cooler_df = pd.read_csv("../../data/cooler.csv")
storage_df = pd.read_csv("../../data/storage.csv")
memory_df = pd.read_csv("../../data/memory.csv")
motherboard_df = pd.read_csv("../../data/motherboard.csv")

In [100]:
cpu_requirements = json.load(open('../requirements/cpu_requirements.json', 'r'))
cooler_requirements = json.load(open('../requirements/cooler_requirements.json', 'r'))
storage_requirements = json.load(open('../requirements/storage_requirements.json', 'r'))
memory_requirements = json.load(open('../requirements/memory_requirements.json', 'r'))
motherboard_requirements = json.load(open('../requirements/motherboard_requirements.json', 'r'))

## Filter CSVs

In [101]:
def normalize_string(s):
    if isinstance(s, str):
        return s.lower().replace(" ", "_").replace("+", "plus").replace("-", "_")
    return s

In [102]:
def filter_cpu(
    df,
    min_cores=0,
    min_core_clock_ghz=0,
    min_boost_clock_ghz=0,
    microarchitecture=None,
    max_tdp_watts=math.inf,
    max_price=math.inf,
):
    if min_cores is None: 
        min_cores = 0
    if min_core_clock_ghz is None:
        min_core_clock_ghz = 0
    if min_boost_clock_ghz is None:
        min_boost_clock_ghz = 0
    if max_tdp_watts is None:
        max_tdp_watts = math.inf 
    if max_price is None:
        max_price = math.inf

    filters = [
        df["core_count"] >= min_cores,
        df["performance_core_clock"] >= min_core_clock_ghz,
        df["performance_core_boost_clock"] >= min_boost_clock_ghz,
        df["tdp"] <= max_tdp_watts,
        df["price"].astype(float) <= max_price,
    ]

    if microarchitecture:
        filters.append(df['microarchitecture'].apply(normalize_string).isin(microarchitecture))

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=True)


#############################################################################


def filter_cooler(
    df,
    min_fan_rpm=0,
    max_noise_level_db=math.inf,
    max_radiator_size_mm=math.inf,
    max_price=math.inf,
):
    if min_fan_rpm is None:
        min_fan_rpm = 0
    if max_noise_level_db is None:
        max_noise_level_db = math.inf 
    if max_radiator_size_mm is None:
        max_radiator_size_mm = math.inf 
    if max_price is None:
        max_price = math.inf 

    filters = [
        df["average_fan_rpm"] >= min_fan_rpm,
        df["average_noise_level"] <= max_noise_level_db,
        df["radiator_size"] <= max_radiator_size_mm,
        df["price"].astype(float) <= max_price,
    ]

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=True)


#############################################################################


def filter_storage(
    df,
    min_capacity_gb=0,
    preferred_type=None, 
    min_cache_gb=0,
    preferred_form_factor=None,
    preferred_interface=None,
    max_price_per_gb=math.inf
):
    if min_capacity_gb is None:
        min_capacity_gb = 0
    if min_cache_gb is None:
        min_cache_gb = 0
    if max_price_per_gb is None:
        max_price_per_gb = math.inf

    filters = [
        df["capacity_gb"].astype(float) >= min_capacity_gb,
        df["cache_gb"].astype(float) >= min_cache_gb,
        df["price_per_gb"].astype(float) <= max_price_per_gb
    ]

    if preferred_type is not None:
        filters.append(df["type"].apply(normalize_string).isin(preferred_type))

    if preferred_form_factor is not None:
        filters.append(df["form_factor"].apply(normalize_string).isin(preferred_form_factor))

    if preferred_interface is not None:
        filters.append(df["interface"].apply(normalize_string).isin(preferred_interface))

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=True)


#############################################################################


def filter_memory(
    df,
    min_capacity_gb=0,
    min_speed_mhz=None, # TODO
    max_module_count=0,
    max_cas_latency=math.inf,
    max_price=math.inf
):
    if min_capacity_gb is None:
        min_capacity_gb = 0
    if max_module_count is None:
        max_module_count = math.inf
    if max_cas_latency is None:
        max_cas_latency = math.inf 
    if max_price is None:
        max_price = math.inf
    filters = [
        df["total_ram"] >= min_capacity_gb,
        df["module_count"] <= max_module_count,
        df["cas_latency"] <= max_cas_latency,
        df["price"].astype(float) <= max_price,
    ]

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=True)


#############################################################################


def filter_motherboard(
    df,
    preferred_socket=None,
    preferred_form_factor=None, 
    min_max_memory_gb=0,
    min_memory_slots=0,
    max_price=math.inf
):
    if min_max_memory_gb is None:
        min_max_memory_gb = 0
    if min_memory_slots is None:
        min_memory_slots = 0
    if max_price is None:
        max_price = math.inf

    filters = [
        df["max_memory_gb"].astype(float) >= min_max_memory_gb,
        df["memory_slots"] >= min_memory_slots,
        df["price"] <= max_price,
    ]

    if preferred_socket:
        filters.append(df["cpu_socket"].apply(normalize_string).isin(preferred_socket))

    if preferred_form_factor:
        filters.append(df["form_factor"].apply(normalize_string).isin(preferred_form_factor))


    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=True)


# filter_memory(memory_df.copy(), **memory_requirements)



In [125]:
limit = 10

cpu_filtered = filter_cpu(cpu_df.copy(), **cpu_requirements).head(limit)
cooler_filtered = filter_cooler(cooler_df.copy(), **cooler_requirements).head(limit)
storage_filtered = filter_storage(storage_df.copy(), **storage_requirements).head(limit)
memory_filtered = filter_memory(memory_df.copy(), **memory_requirements).head(limit)
motherboard_filtered = filter_motherboard(motherboard_df.copy(), **motherboard_requirements).head(limit)

In [126]:
cpu_filtered

Unnamed: 0,title,core_count,performance_core_clock,performance_core_boost_clock,microarchitecture,tdp,integrated_graphics,rating,price
0,AMD Ryzen 7 9800X3D,8,4.7,5.2,Zen 5,120,Radeon,4.5,499.0
1,AMD Ryzen 7 7800X3D,8,4.2,5.0,Zen 4,120,Radeon,4.5,391.12
2,AMD Ryzen 5 7600X,6,4.7,5.3,Zen 4,105,Radeon,4.5,206.3
3,AMD Ryzen 5 5600X,6,3.7,4.6,Zen 3,65,,4.5,149.0
4,AMD Ryzen 7 9700X,8,3.8,5.5,Zen 5,65,Radeon,4.5,303.58
5,AMD Ryzen 5 9600X,6,3.9,5.4,Zen 5,65,Radeon,4.5,208.0
6,AMD Ryzen 9 9950X3D,16,4.3,5.7,Zen 5,170,Radeon,4.5,899.99
7,AMD Ryzen 7 7700X,8,4.5,5.4,Zen 4,105,Radeon,4.5,268.6
8,AMD Ryzen 5 5500,6,3.6,4.2,Zen 3,65,,4.5,73.0
9,AMD Ryzen 5 7600,6,3.8,5.1,Zen 4,65,Radeon,4.5,198.85


In [127]:
cooler_filtered

Unnamed: 0,title,rating,price,fan_rpm,noise_level,color,radiator_size,average_noise_level,average_fan_rpm
0,Thermalright Aqua Elite V3,5.0,44.9,1500 RPM,25.6 dB,Black,240.0,25.6,1500.0
1,Thermalright Frozen Notte ARGB,4.5,48.9,2000 RPM,27.7 dB,White,240.0,27.7,2000.0
2,Thermalright Aqua Elite V3,4.5,44.9,1500 RPM,25.6 dB,White,240.0,25.6,1500.0
3,Thermalright Frozen Notte ARGB,4.0,49.9,2000 RPM,27.7 dB,Black,240.0,27.7,2000.0
4,Thermalright Aqua Elite V3,5.0,34.9,1500 RPM,25.6 dB,Black,120.0,25.6,1500.0
5,Thermalright Frozen Infinity 240 ARGB,4.0,47.9,2000 RPM,28.2 dB,Black,240.0,28.2,2000.0
6,Thermalright Frozen Edge,4.5,48.9,2150 RPM,28.1 dB,Black,240.0,28.1,2150.0
7,Thermalright AQUA ELITE ARGB V4,5.0,45.49,1550 RPM,25.6 dB,Black,240.0,25.6,1550.0
8,Thermalright AQUA ELITE ARGB V4,5.0,43.99,1550 RPM,25.6 dB,White,240.0,25.6,1550.0
9,Thermalright Frozen Prism ARGB,4.0,47.9,1850 RPM,27 dB,Black,240.0,27.0,1850.0


In [128]:
storage_filtered

Unnamed: 0,title,rating,price,capacity,price_per_gb,type,cache,form_factor,interface,cache_gb,capacity_gb
0,Samsung 990 Pro,4.5,169.99,2 TB,0.085,SSD,2048 MB,M.2-2280,M.2 PCIe 4.0 X4,2.048,2000.0
1,Samsung 990 Pro,4.5,302.0,4 TB,0.075,SSD,4096 MB,M.2-2280,M.2 PCIe 4.0 X4,4.096,4000.0
2,Samsung 9100 PRO,5.0,489.99,4 TB,0.122,SSD,4096 MB,M.2-2280,M.2 PCIe 5.0 X4,4.096,4000.0
3,Samsung 980 Pro,4.5,203.0,2 TB,0.102,SSD,2048 MB,M.2-2280,M.2 PCIe 4.0 X4,2.048,2000.0
4,Samsung 9100 PRO,,269.99,2 TB,0.135,SSD,2048 MB,M.2-2280,M.2 PCIe 5.0 X4,2.048,2000.0
5,Samsung 870 Evo,4.5,162.0,2 TB,0.081,SSD,2048 MB,"2.5""",SATA 6.0 Gb/s,2.048,2000.0
6,Samsung 870 Evo,4.5,269.99,4 TB,0.067,SSD,4096 MB,"2.5""",SATA 6.0 Gb/s,4.096,4000.0
7,Acer Predator GM7000,4.5,125.99,2 TB,0.063,SSD,2048 MB,M.2-2280,M.2 PCIe 4.0 X4,2.048,2000.0
8,Samsung 990 Pro w/Heatsink,5.0,312.99,4 TB,0.078,SSD,4096 MB,M.2-2280,M.2 PCIe 4.0 X4,4.096,4000.0
9,Samsung 990 Pro w/Heatsink,4.5,179.99,2 TB,0.09,SSD,2048 MB,M.2-2280,M.2 PCIe 4.0 X4,2.048,2000.0


In [129]:
memory_filtered

Unnamed: 0,title,rating,price,speed,modules,price_per_gb,color,first_word_latency,cas_latency,module_count,gb_per_module,total_ram
0,G.Skill Ripjaws V 32 GB,4.5,52.99,DDR4-3600,2 x 16GB,1.656,Black,10 ns,18.0,2,16,32
1,G.Skill Aegis 16 GB,4.5,29.99,DDR4-3200,2 x 8GB,1.874,Red / Black,10 ns,16.0,2,8,16
2,Corsair Vengeance RGB 64 GB,4.5,239.99,DDR5-6000,2 x 32GB,3.75,White / Silver,10 ns,30.0,2,32,64
3,Silicon Power SP016GLLTU160N22 16 GB,4.5,15.99,DDR3-1600,2 x 8GB,0.999,Green,13.75 ns,11.0,2,8,16
4,G.Skill Flare X5 32 GB,,114.99,DDR5-6000,2 x 16GB,3.593,Black,9.333 ns,28.0,2,16,32
5,Corsair Vengeance 192 GB,,599.99,DDR5-5200,4 x 48GB,3.125,Black,14.615 ns,38.0,4,48,192
6,Crucial Pro 64 GB,5.0,89.99,DDR4-3200,2 x 32GB,1.406,Black / Green,13.75 ns,22.0,2,32,64
7,G.Skill Trident Z5 Neo 64 GB,4.5,204.99,DDR5-6000,2 x 32GB,3.203,Black / Silver,10 ns,30.0,2,32,64
8,ADATA XPG Lancer Blade 32 GB,5.0,109.99,DDR5-6000,2 x 16GB,3.437,Black,10 ns,30.0,2,16,32
9,G.Skill Trident Z5 RGB 64 GB,4.5,234.99,DDR5-6800,2 x 32GB,3.672,Black,10 ns,34.0,2,32,64


In [130]:
motherboard_filtered

Unnamed: 0,title,rating,price,cpu_socket,form_factor,max_memory,memory_slots,color,max_memory_gb
0,MSI B650 GAMING PLUS WIFI,4.5,169.99,AM5,ATX,192 GB,4,Black,192.0
1,Asus PRIME B650-PLUS WIFI,5.0,149.99,AM5,ATX,192 GB,4,Black / Silver,192.0
2,MSI MAG B650 TOMAHAWK WIFI,4.5,199.62,AM5,ATX,256 GB,4,Black,256.0
3,Asus PRIME B550M-A WIFI II,4.0,119.99,AM4,Micro ATX,128 GB,4,Blue / Silver,128.0
4,Gigabyte X870E AORUS ELITE WIFI7,4.0,315.08,AM5,ATX,256 GB,4,Black,256.0
5,Gigabyte B650 EAGLE AX,4.5,164.99,AM5,ATX,192 GB,4,Gray / Black,192.0
6,MSI MAG X870 TOMAHAWK WIFI,4.5,299.99,AM5,ATX,256 GB,4,Black,256.0
7,Gigabyte X870 EAGLE WIFI7,5.0,229.99,AM5,ATX,256 GB,4,Black,256.0
8,MSI B760 GAMING PLUS WIFI,4.5,173.32,LGA1700,ATX,192 GB,4,Black / Silver,192.0
9,MSI B550M PRO-VDH WIFI,4.5,103.99,AM4,Micro ATX,128 GB,4,Black,128.0


## Recommendation engine

In [131]:
system_prompt = """You are tasked with recommending a compatible and high-performance PC setup. You are given five JSON arrays, consisting of details of CPUs, storage hard drives, and memory modules. From the list, choose only ONE component from each array, ensuring compatibility across all components that it meets the user's expectation and preference based on their input. For each component, output the name, as well as the index number of its row. You must only select from the given options. Do not invent anything new."""

class Component(BaseModel):
    index: int
    name: str 
    price: int

class ComponentChoices(BaseModel):
    cpu: Component 
    cooler: Component
    storage: Component 
    memory: Component 
    motherboard: Component

message = "I want a decent PC rig that does things fast and has a lot of RAM!"

user_prompt = f"""The user inputted: {message}

Here are the component options:
CPUs: {json.dumps(cpu_filtered.reset_index().to_dict(orient="records"))}
Coolers: {json.dumps(cooler_filtered.reset_index().to_dict(orient="records"))}
Storage: {json.dumps(storage_filtered.reset_index().to_dict(orient="records"))}
Memory: {json.dumps(memory_filtered.reset_index().to_dict(orient="records"))}
Motherboard: {json.dumps(motherboard_filtered.reset_index().to_dict(orient="records"))}
"""


In [136]:
pc_build = client.chat.completions.create(
    model=MODEL,
    response_model=ComponentChoices,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
).model_dump()

pc_build


{'cpu': {'index': 1, 'name': 'AMD Ryzen 7 7800X3D', 'price': 391},
 'cooler': {'index': 4, 'name': 'Thermalright Aqua Elite V3', 'price': 34},
 'storage': {'index': 1, 'name': 'Samsung 990 Pro', 'price': 302},
 'memory': {'index': 2, 'name': 'Corsair Vengeance RGB 64 GB', 'price': 240},
 'motherboard': {'index': 2,
  'name': 'MSI MAG B650 TOMAHAWK WIFI',
  'price': 200}}

In [137]:
def get_recommendation(message: str):
    system_prompt = """You are tasked with recommending a compatible and high-performance PC setup. You are given five JSON arrays, consisting of details of CPUs, storage hard drives, and memory modules. From the list, choose only ONE component from each array, ensuring compatibility across all components that it meets the user's expectation and preference based on their input. For each component, output the name, as well as the index number of its row. You must only select from the given options. Do not invent anything new."""

    user_prompt = f"""The user inputted: {message}

    Here are the component options:
    CPUs: {json.dumps(cpu_filtered.reset_index().to_dict(orient="records"))}
    Coolers: {json.dumps(cooler_filtered.reset_index().to_dict(orient="records"))}
    Storage: {json.dumps(storage_filtered.reset_index().to_dict(orient="records"))}
    Memory: {json.dumps(memory_filtered.reset_index().to_dict(orient="records"))}
    Motherboard: {json.dumps(motherboard_filtered.reset_index().to_dict(orient="records"))}
    """

    print("Sent request...")

    recommendation = client.chat.completions.create(
        model=MODEL,
        response_model=ComponentChoices,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    ).model_dump()

    return recommendation

get_recommendation(message="I want a decent PC rig that does things fast and has a lot of RAM!")

Sent request...


{'cpu': {'index': 7, 'name': 'AMD Ryzen 7 7800X3D', 'price': 268},
 'cooler': {'index': 2, 'name': 'Thermalright Aqua Elite V3', 'price': 44},
 'storage': {'index': 1, 'name': 'Samsung 990 Pro', 'price': 302},
 'memory': {'index': 3, 'name': 'G.Skill Aegis 16 GB', 'price': 30},
 'motherboard': {'index': 1,
  'name': 'Asus PRIME B650-PLUS WIFI',
  'price': 150}}