In [1]:
import pandas as pd
import random
import tqdm

In [13]:
# Define possible values for each column
brands = ["Dell", "HP", "Lenovo", "Apple", "Asus", "Acer"]
cpus = ["Intel Core i7", "Intel Core i5", "AMD Ryzen 7", "AMD Ryzen 5"]
rams = ["8GB", "16GB", "32GB", "64GB"]
gpus = ["NVIDIA GeForce GTX 1660", "AMD Radeon RX 570", "Integrated Graphics"]
memories = ["256GB SSD", "512GB SSD", "1TB HDD", "1TB SSD"]
screens = ["OLED", "LED", "LCD"]
qualities = ["High", "Medium", "Low"]

# Define quality ranges for components
brand_qualities = {
    "Dell": 10,
    "HP": 8,
    "Lenovo": 5,
    "Apple": 25,
    "Asus": 15,
    "Acer": 7
}

cpu_qualities = {
    "Intel Core i7": 400,
    "Intel Core i5": 350,
    "AMD Ryzen 7": 380,
    "AMD Ryzen 5": 320,
}

ram_qualities = {
    "8GB": 80,
    "16GB": 140,
    "32GB": 280,
    "64GB": 400,
}

gpu_qualities = {
    "NVIDIA GeForce GTX 1660": 200,
    "AMD Radeon RX 570": 180,
    "Integrated Graphics": 0,
}

memory_qualities = {
    "256GB SSD": 60,
    "512GB SSD": 100,
    "1TB HDD": 40,
    "1TB SSD": 160,
}
screen_qualities = {
    "OLED": 200,
    "LED": 100, 
    "LCD": 50
}

quality_qualities = {
    "High": 15, 
    "Medium": 10,
    "Low": 0
}
# Generate random data with qualities
num_samples = 1000000
data = []

for _ in range(num_samples):
    brand = random.choice(brands)
    cpu = random.choice(cpus)
    ram = random.choice(rams)
    gpu = random.choice(gpus)
    memory = random.choice(memories)
    screen = random.choice(screens) 
    quality = random.choice(qualities)
    
    # Calculate the total quality based on component choices
    total_quality = (
        cpu_qualities[cpu] +
        ram_qualities[ram] +
        gpu_qualities[gpu] +
        memory_qualities[memory] +
        screen_qualities[screen]
    ) * (1+(quality_qualities[quality]+brand_qualities[brand])/100)
    total_quality = round(total_quality, 2)
    # # Add a small random variation to the quality for realism
    # total_quality += random.uniform(-10, 10)
    
    data.append([brand, cpu, ram, gpu, memory, quality, screen, total_quality])

# Create a DataFrame
column_names = ["Brand", "CPU", "RAM", "GPU", "Memory", "Build_Quality", "Screen", "Quality"]
df = pd.DataFrame(data, columns=column_names)

# Define the maximum and minimum quality values in your dataset
max_quality = df["Quality"].max()
min_quality = df["Quality"].min()

# Define a function to rescale the quality values to a 0-100 scale
def rescale_quality(quality):
    # Use the formula to rescale the quality values
    rescaled_quality = ((quality - min_quality) / (max_quality - min_quality)) * 100
    return rescaled_quality

# Apply the rescale_quality function to the "quality" column and create a new "Quality" column
df["Quality"] = df["Quality"].apply(rescale_quality)

# Save the DataFrame to a CSV file
df.to_csv("procurement_data_with_qualities.csv", index=False)

In [14]:
df

Unnamed: 0,Brand,CPU,RAM,GPU,Memory,Build_Quality,Screen,Quality
0,Apple,AMD Ryzen 5,8GB,AMD Radeon RX 570,1TB HDD,Low,LED,27.743793
1,Dell,Intel Core i7,16GB,AMD Radeon RX 570,512GB SSD,High,OLED,54.731918
2,HP,Intel Core i7,8GB,AMD Radeon RX 570,512GB SSD,Medium,LCD,31.759626
3,HP,AMD Ryzen 5,32GB,AMD Radeon RX 570,1TB HDD,High,LCD,39.985606
4,Dell,AMD Ryzen 7,16GB,Integrated Graphics,512GB SSD,High,LED,27.743793
...,...,...,...,...,...,...,...,...
999995,Apple,AMD Ryzen 5,8GB,Integrated Graphics,512GB SSD,Low,OLED,25.944584
999996,Dell,AMD Ryzen 7,16GB,Integrated Graphics,512GB SSD,Low,LED,19.971213
999997,Lenovo,Intel Core i5,32GB,NVIDIA GeForce GTX 1660,1TB SSD,Medium,LCD,49.046420
999998,Dell,AMD Ryzen 7,32GB,AMD Radeon RX 570,512GB SSD,Medium,OLED,61.424973


In [18]:
sorted_df = df.sort_values(by="Quality")

# Print the laptop with the lowest quality (first row in the sorted DataFrame)
lowest_quality_laptop = sorted_df.iloc[-1]

print("Laptop with the highest quality:")
print("Brand:", lowest_quality_laptop["Brand"])
print("CPU:", lowest_quality_laptop["CPU"])
print("RAM:", lowest_quality_laptop["RAM"])
print("GPU:", lowest_quality_laptop["GPU"])
print("Memory:", lowest_quality_laptop["Memory"])
print("Quality:", lowest_quality_laptop["Build_Quality"])
print("quality:", lowest_quality_laptop["Quality"])

Laptop with the highest quality:
Brand: Apple
CPU: Intel Core i7
RAM: 64GB
GPU: NVIDIA GeForce GTX 1660
Memory: 1TB SSD
Quality: High
quality: 100.0


# Model

In [19]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [34]:
# Load your dataset
df = pd.read_csv("procurement_data_with_qualities.csv")

# Convert categorical variables to one-hot encoding (or label encoding)
df = pd.get_dummies(df, columns=["Brand", "CPU", "RAM", "GPU", "Memory", "Build_Quality", "Screen"])

# Define features (X) and target (y)
X = df.drop("Quality", axis=1)
y = df["Quality"]

# Split the data into training and testing sets (e.g., 80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train a Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

Mean Absolute Error: 10.326185010375976
Root Mean Squared Error: 16.045724438389986
R-squared: 0.9949983259698053
