In [44]:
# main libraries
import os
import json
import math
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from groq import Groq

# typing and classes
import instructor
from pydantic import BaseModel, Field
from typing import Optional, Literal, List
from enum import Enum 

# constants
load_dotenv("../.env")
GROQ_KEY = os.getenv("GROQ_KEY")
MODEL = "llama-3.1-8b-instant"

# initialisation of model
client = Groq(api_key=GROQ_KEY)
client = instructor.from_groq(client, mode=instructor.Mode.JSON)
print("Groq client initialised.")

Groq client initialised.


In [45]:
cpu_df = pd.read_csv("../../data/cpu.csv")
cooler_df = pd.read_csv("../../data/cooler.csv")
storage_df = pd.read_csv("../../data/storage.csv")
memory_df = pd.read_csv("../../data/memory.csv")
motherboard_df = pd.read_csv("../../data/motherboard.csv")

In [46]:
cpu_requirements = json.load(open('../requirements/cpu_requirements.json', 'r'))
cooler_requirements = json.load(open('../requirements/cooler_requirements.json', 'r'))
storage_requirements = json.load(open('../requirements/storage_requirements.json', 'r'))
memory_requirements = json.load(open('../requirements/memory_requirements.json', 'r'))
motherboard_requirements = json.load(open('../requirements/motherboard_requirements.json', 'r'))

## Filter CSVs

In [47]:
def normalize_string(s):
    if isinstance(s, str):
        return s.lower().replace(" ", "_").replace("+", "plus").replace("-", "_")
    return s

In [48]:
def filter_cpu(
    df,
    min_cores=0,
    min_core_clock_ghz=0,
    min_boost_clock_ghz=0,
    microarchitecture=None,
    max_tdp_watts=math.inf,
    max_price=math.inf,
):
    if min_cores is None: 
        min_cores = 0
    if min_core_clock_ghz is None:
        min_core_clock_ghz = 0
    if min_boost_clock_ghz is None:
        min_boost_clock_ghz = 0
    if max_tdp_watts is None:
        max_tdp_watts = math.inf 
    if max_price is None:
        max_price = math.inf

    filters = [
        df["core_count"] >= min_cores,
        df["performance_core_clock"] >= min_core_clock_ghz,
        df["performance_core_boost_clock"] >= min_boost_clock_ghz,
        df["tdp"] <= max_tdp_watts,
        df["price"].astype(float) <= max_price,
    ]

    if microarchitecture:
        filters.append(df['microarchitecture'].apply(normalize_string).isin(microarchitecture))

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=False).rename(columns={"index": "real_index"})


#############################################################################


def filter_cooler(
    df,
    min_fan_rpm=0,
    max_noise_level_db=math.inf,
    max_radiator_size_mm=math.inf,
    max_price=math.inf,
):
    if min_fan_rpm is None:
        min_fan_rpm = 0
    if max_noise_level_db is None:
        max_noise_level_db = math.inf 
    if max_radiator_size_mm is None:
        max_radiator_size_mm = math.inf 
    if max_price is None:
        max_price = math.inf 

    filters = [
        df["average_fan_rpm"] >= min_fan_rpm,
        df["average_noise_level"] <= max_noise_level_db,
        df["radiator_size"] <= max_radiator_size_mm,
        df["price"].astype(float) <= max_price,
    ]

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=False).rename(columns={"index": "real_index"})


#############################################################################


def filter_storage(
    df,
    min_capacity_gb=0,
    preferred_type=None, 
    min_cache_gb=0,
    preferred_form_factor=None,
    preferred_interface=None,
    max_price_per_gb=math.inf
):
    if min_capacity_gb is None:
        min_capacity_gb = 0
    if min_cache_gb is None:
        min_cache_gb = 0
    if max_price_per_gb is None:
        max_price_per_gb = math.inf

    filters = [
        df["capacity_gb"].astype(float) >= min_capacity_gb,
        df["cache_gb"].astype(float) >= min_cache_gb,
        df["price_per_gb"].astype(float) <= max_price_per_gb
    ]

    if preferred_type is not None:
        filters.append(df["type"].apply(normalize_string).isin(preferred_type))

    if preferred_form_factor is not None:
        filters.append(df["form_factor"].apply(normalize_string).isin(preferred_form_factor))

    if preferred_interface is not None:
        filters.append(df["interface"].apply(normalize_string).isin(preferred_interface))

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=False).rename(columns={"index": "real_index"})


#############################################################################


def filter_memory(
    df,
    min_capacity_gb=0,
    min_speed_mhz=None, # TODO
    max_module_count=0,
    max_cas_latency=math.inf,
    max_price=math.inf
):
    if min_capacity_gb is None:
        min_capacity_gb = 0
    if max_module_count is None:
        max_module_count = math.inf
    if max_cas_latency is None:
        max_cas_latency = math.inf 
    if max_price is None:
        max_price = math.inf
    filters = [
        df["total_ram"] >= min_capacity_gb,
        df["module_count"] <= max_module_count,
        df["cas_latency"] <= max_cas_latency,
        df["price"].astype(float) <= max_price,
    ]

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=False).rename(columns={"index": "real_index"})


#############################################################################


def filter_motherboard(
    df,
    preferred_socket=None,
    preferred_form_factor=None, 
    min_max_memory_gb=0,
    min_memory_slots=0,
    max_price=math.inf
):
    if min_max_memory_gb is None:
        min_max_memory_gb = 0
    if min_memory_slots is None:
        min_memory_slots = 0
    if max_price is None:
        max_price = math.inf

    filters = [
        df["max_memory_gb"].astype(float) >= min_max_memory_gb,
        df["memory_slots"] >= min_memory_slots,
        df["price"] <= max_price,
    ]

    if preferred_socket:
        filters.append(df["cpu_socket"].apply(normalize_string).isin(preferred_socket))

    if preferred_form_factor:
        filters.append(df["form_factor"].apply(normalize_string).isin(preferred_form_factor))

    return df.loc[pd.concat(filters, axis=1).all(axis=1)].reset_index(drop=False).rename(columns={"index": "real_index"})


# filter_memory(memory_df.copy(), **memory_requirements)



In [49]:
limit = 10

cpu_filtered = filter_cpu(cpu_df.copy(), **cpu_requirements).head(limit)
cooler_filtered = filter_cooler(cooler_df.copy(), **cooler_requirements).head(limit)
storage_filtered = filter_storage(storage_df.copy(), **storage_requirements).head(limit)
memory_filtered = filter_memory(memory_df.copy(), **memory_requirements).head(limit)
motherboard_filtered = filter_motherboard(motherboard_df.copy(), **motherboard_requirements).head(limit)

In [50]:
cpu_filtered

Unnamed: 0,real_index,title,core_count,performance_core_clock,performance_core_boost_clock,microarchitecture,tdp,integrated_graphics,rating,price
0,11,AMD Ryzen 7 5800X,8,3.8,4.7,Zen 3,105,,4.5,158.22
1,16,AMD Ryzen 7 5700X,8,3.4,4.6,Zen 3,65,,4.5,128.0
2,23,Intel Core i5-14400F,10,2.5,4.7,Raptor Lake Refresh,65,,,117.0
3,25,Intel Core i7-12700KF,12,3.6,5.0,Alder Lake,125,,4.5,154.0
4,27,Intel Core i5-14600K,14,3.5,5.3,Raptor Lake Refresh,125,Intel UHD Graphics 770,4.5,194.99
5,31,Intel Core i5-12600KF,10,3.7,4.9,Alder Lake,125,,4.5,154.0
6,32,AMD Ryzen 7 3700X,8,3.6,4.4,Zen 2,65,,4.5,133.9
7,38,Intel Core i5-13400F,10,2.5,4.6,Raptor Lake,65,,4.5,133.19
8,39,AMD Ryzen 7 5700,8,3.7,4.6,Zen 3,65,,5.0,126.8
9,41,Intel Core i5-14600KF,14,3.5,5.3,Raptor Lake Refresh,125,,5.0,185.0


In [51]:
cooler_filtered

Unnamed: 0,real_index,title,rating,price,fan_rpm,noise_level,color,radiator_size,average_noise_level,average_fan_rpm
0,25,Thermalright Aqua Elite V3,5.0,44.9,1500 RPM,25.6 dB,Black,240.0,25.6,1500.0
1,82,Thermalright Frozen Notte ARGB,4.5,48.9,2000 RPM,27.7 dB,White,240.0,27.7,2000.0
2,108,Thermalright Aqua Elite V3,4.5,44.9,1500 RPM,25.6 dB,White,240.0,25.6,1500.0
3,118,Thermalright Frozen Notte ARGB,4.0,49.9,2000 RPM,27.7 dB,Black,240.0,27.7,2000.0
4,154,Thermalright Aqua Elite V3,5.0,34.9,1500 RPM,25.6 dB,Black,120.0,25.6,1500.0
5,191,Thermalright Frozen Infinity 240 ARGB,4.0,47.9,2000 RPM,28.2 dB,Black,240.0,28.2,2000.0
6,219,Thermalright Frozen Edge,4.5,48.9,2150 RPM,28.1 dB,Black,240.0,28.1,2150.0
7,268,Thermalright AQUA ELITE ARGB V4,5.0,45.49,1550 RPM,25.6 dB,Black,240.0,25.6,1550.0
8,278,Thermalright AQUA ELITE ARGB V4,5.0,43.99,1550 RPM,25.6 dB,White,240.0,25.6,1550.0
9,312,Thermalright Frozen Prism ARGB,4.0,47.9,1850 RPM,27 dB,Black,240.0,27.0,1850.0


In [52]:
storage_filtered = storage_df.head(limit)
storage_filtered

Unnamed: 0,title,rating,price,capacity,price_per_gb,type,cache,form_factor,interface,cache_gb,capacity_gb
0,Samsung 990 Pro,4.5,169.99,2 TB,0.085,SSD,2048 MB,M.2-2280,M.2 PCIe 4.0 X4,2.048,2000.0
1,Crucial P3 Plus,4.5,61.99,1 TB,0.062,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,1000.0
2,Crucial P3 Plus,4.5,121.99,2 TB,0.061,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,2000.0
3,Samsung 990 Pro,4.5,302.0,4 TB,0.075,SSD,4096 MB,M.2-2280,M.2 PCIe 4.0 X4,4.096,4000.0
4,Kingston NV3,4.5,62.99,1 TB,0.063,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,1000.0
5,Samsung 990 Pro,4.5,99.99,1 TB,0.1,SSD,1024 MB,M.2-2280,M.2 PCIe 4.0 X4,1.024,1000.0
6,Western Digital WD_Black SN850X,4.5,147.94,2 TB,0.074,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,2000.0
7,Samsung 990 EVO Plus,5.0,139.99,2 TB,0.07,SSD,,M.2-2280,M.2 PCIe 5.0 X2,,2000.0
8,Kingston NV2,4.5,69.64,1 TB,0.07,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,1000.0
9,Western Digital Black SN770,4.5,70.86,1 TB,0.071,SSD,,M.2-2280,M.2 PCIe 4.0 X4,,1000.0


In [53]:
memory_filtered

Unnamed: 0,real_index,title,rating,price,speed,modules,price_per_gb,color,first_word_latency,cas_latency,module_count,gb_per_module,total_ram
0,0,G.Skill Ripjaws V 32 GB,4.5,52.99,DDR4-3600,2 x 16GB,1.656,Black,10 ns,18.0,2,16,32
1,1,G.Skill Aegis 16 GB,4.5,29.99,DDR4-3200,2 x 8GB,1.874,Red / Black,10 ns,16.0,2,8,16
2,3,Silicon Power SP016GLLTU160N22 16 GB,4.5,15.99,DDR3-1600,2 x 8GB,0.999,Green,13.75 ns,11.0,2,8,16
3,12,Timetec PINNACLE Konduit 32 GB,5.0,43.99,DDR4-3200,2 x 16GB,1.375,White,10 ns,16.0,2,16,32
4,13,Corsair Vengeance RGB Pro 16 GB,4.5,55.99,DDR4-3200,2 x 8GB,3.499,White,10 ns,16.0,2,8,16
5,21,TEAMGROUP T-Force Delta RGB 16 GB,4.5,38.99,DDR4-3600,2 x 8GB,2.437,Black,10 ns,18.0,2,8,16
6,29,Crucial CT16G48C40U5 16 GB,5.0,33.11,DDR5-4800,1 x 16GB,2.069,Black,16.667 ns,40.0,1,16,16
7,44,Patriot Viper Venom 16 GB,5.0,52.99,DDR5-5200,2 x 8GB,3.312,Black / White,13.846 ns,36.0,2,8,16
8,47,TEAMGROUP T-Force Vulcan Z 16 GB,4.5,26.99,DDR4-3200,2 x 8GB,1.687,Gray,10 ns,16.0,2,8,16
9,48,Silicon Power XPOWER Turbine 16 GB,4.5,25.97,DDR4-3200,2 x 8GB,1.623,Blue,10 ns,16.0,2,8,16


In [54]:
motherboard_filtered = motherboard_df.head(limit)
motherboard_filtered

Unnamed: 0,title,rating,price,cpu_socket,form_factor,max_memory,memory_slots,color,max_memory_gb
0,MSI B650 GAMING PLUS WIFI,4.5,169.99,AM5,ATX,192 GB,4,Black,192.0
1,Asus PRIME B650-PLUS WIFI,5.0,149.99,AM5,ATX,192 GB,4,Black / Silver,192.0
2,MSI MAG B650 TOMAHAWK WIFI,4.5,199.62,AM5,ATX,256 GB,4,Black,256.0
3,Asus PRIME B550M-A WIFI II,4.0,119.99,AM4,Micro ATX,128 GB,4,Blue / Silver,128.0
4,Gigabyte X870E AORUS ELITE WIFI7,4.0,315.08,AM5,ATX,256 GB,4,Black,256.0
5,Gigabyte B650 EAGLE AX,4.5,164.99,AM5,ATX,192 GB,4,Gray / Black,192.0
6,MSI MAG X870 TOMAHAWK WIFI,4.5,299.99,AM5,ATX,256 GB,4,Black,256.0
7,Gigabyte A520M K V2,3.5,79.0,AM4,Micro ATX,64 GB,2,Brown / Black,64.0
8,Gigabyte X870 EAGLE WIFI7,5.0,229.99,AM5,ATX,256 GB,4,Black,256.0
9,MSI B760 GAMING PLUS WIFI,4.5,173.32,LGA1700,ATX,192 GB,4,Black / Silver,192.0


## Recommendation engine

In [55]:
system_prompt = """You are tasked with recommending a compatible and high-performance PC setup. You are given five JSON arrays, consisting of details of CPUs, storage hard drives, and memory modules. From the list, choose only ONE component from each array, ensuring compatibility across all components that it meets the user's expectation and preference based on their input. For each component, output the name, as well as the real index number of its row. You must only select from the given options. Do not invent anything new."""

class Component(BaseModel):
    real_index: int
    name: str 
    price: int

class ComponentChoices(BaseModel):
    cpu: Component 
    cooler: Component
    storage: Component 
    memory: Component 
    motherboard: Component

message = "I want a decent PC rig that does things fast and has a lot of RAM!"

user_prompt = f"""The user inputted: {message}

Here are the component options:
CPUs: {json.dumps(cpu_filtered.reset_index().to_dict(orient="records"))}
Coolers: {json.dumps(cooler_filtered.reset_index().to_dict(orient="records"))}
Storage: {json.dumps(storage_filtered.reset_index().to_dict(orient="records"))}
Memory: {json.dumps(memory_filtered.reset_index().to_dict(orient="records"))}
Motherboard: {json.dumps(motherboard_filtered.reset_index().to_dict(orient="records"))}
"""


In [56]:
pc_build = client.chat.completions.create(
    model=MODEL,
    response_model=ComponentChoices,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
).model_dump()

pc_build


{'cpu': {'real_index': 25, 'name': 'Intel Core i7-12700KF', 'price': 154},
 'cooler': {'real_index': 278,
  'name': 'Thermalright AQUA ELITE ARGB V4',
  'price': 43},
 'storage': {'real_index': 0, 'name': 'Samsung 990 Pro', 'price': 169},
 'memory': {'real_index': 12,
  'name': 'Timetec PINNACLE Konduit 32 GB',
  'price': 43},
 'motherboard': {'real_index': 1,
  'name': 'Asus PRIME B650-PLUS WIFI',
  'price': 149}}

In [59]:
cooler_df.loc[278]

title                  Thermalright AQUA ELITE ARGB V4
rating                                             5.0
price                                            43.99
fan_rpm                                       1550 RPM
noise_level                                    25.6 dB
color                                            White
radiator_size                                    240.0
average_noise_level                               25.6
average_fan_rpm                                 1550.0
Name: 278, dtype: object

In [58]:
memory_df.loc[12]

title                 Timetec PINNACLE Konduit 32 GB
rating                                           5.0
price                                          43.99
speed                                      DDR4-3200
modules                                     2 x 16GB
price_per_gb                                   1.375
color                                          White
first_word_latency                             10 ns
cas_latency                                     16.0
module_count                                       2
gb_per_module                                     16
total_ram                                         32
Name: 12, dtype: object