In [None]:
"""
COMPUTER SYSTEMS OF EVERY MAJOR COMPANY SCRAPER

This tool is designed to collect data on every computer system produced by major companies. 
It leverages OpenAI to gather and assign data to specific data points automatically.

Without OpenAI
    Without using OpenAI to populate the data automatically, you would need to:
        -Identify elements/selectors for each site and update them if the HTML is dynamic (like on Amazon).
                -Manually figure out elements/selectors for each of the ~45 data points.
                -This process cannot be automated with OpenAI.
        -Use Selenium and BeautifulSoup for web scraping.
        -Potentially figure out an API for each site.
        -Gather specific elements/selectors for:
            -45 data points
            -15 sites
            -50 products per site
        This results in 33,750 combinations (assuming elements/selectors differ for each product on the same site). 
        This doesn't include figuring out variations of the same product on the same page.

With OpenAI
    Using OpenAI simplifies the process:
        -Input a concise prompt with as few tokens as possible.
        -Manually input the list of products.
            -No consistent way was found to accurately scrape all products from a product page, so this must be done manually.
            -This process can take about 5-10 hours, depending on the number of products.
        -OpenAI responds with text in CSV format, which is automatically written to a text file.
            -This can be converted into a CSV data file for further use.
            
"""

In [15]:
from openai import OpenAI
import json, os

# Initial Set up
while True:
    try:
        dataLocation = (r'C:\Users\keena\Desktop\Comp Sys Project')
        os.chdir(dataLocation)
        chatGPTCredentials = 'ChatGPTAPIKey.json'
        with open (chatGPTCredentials,'r') as credFile:
            jsonData = json.load(credFile)
            api_key = jsonData['key']
        print ('INITIAL SET UP SUCCESS')
        break

    except Exception as err:
        print ('INITIAL SET UP FAILED')
        continue

INITIAL SET UP SUCCESS


In [21]:
client = OpenAI(api_key=api_key)

# Use the Concise Prompt Evaluator Here -> https://chatgpt.com/g/g-mxx60FhwP-concise-prompt-evaluator
# How should the ai respond and with what? Formal professional or casual teacher?
aiRole        = """Generate CSV data for each provided link with the following columns:
company,data_acquired_date,products,model,product_line,price,discounted_price,cpu_manufacturer,cpu_model,cpu_cores,cpu_threads,cpu_base_clock_speed,cpu_turbo_clock_speed,gpu_manufacturer,gpu_model,gpu_vram,ram_capacity,ram_type,ram_speed,storage_type,storage_capacity,display_size,display_resolution,display_refresh_rate,display_panel_type,dimensions,weight,material,battery_capacity,battery_life,power_supply,usb_ports,hdmi_displayport,audio_ports,network_connectivity,operating_system,pre_installed_software,keyboard,touchpad,security_features,webcam,speakers,cooling_system,warranty,customer_support,url

Example:
Dell,2024-06-21,"XPS 13 9345 Laptop",XPS 13,XPS,1449.99,1349.99,Intel,Core i7-8550U,4,8,"1.8 GHz","4.0 GHz",Intel,"UHD Graphics 620",Integrated,"8 GB",LPDDR3,"1866 MHz",SSD,"256 GB","13.3 inches","1920 x 1080 (Full HD)","60 Hz",IPS,"11.9 x 7.8 x 0.46 inches","2.7 pounds",Aluminum,"52 WHr","Up to 11 hours","45W AC Adapter","2 x USB-C (Thunderbolt 3)","Available via adapter","1 x Headphone/Microphone Combo","Wi-Fi 6, Bluetooth 5.1",Windows 10 Home,"Microsoft Office Trial, McAfee LiveSafe","Backlit, Chiclet-style","Precision Touchpad","Fingerprint Reader, TPM 2.0","720p HD","Stereo Speakers","Dual Fan","1 Year Limited Hardware Warranty","24/7 Premium Support","https://www.dell.com/en-us/shop/dell-laptops/new-xps-13-laptop/spd/xps-13-9345-laptop/usexchbts9345hchl?ref=variantstack"

Please respond with only the CSV data, including all variations if available.
"""

# This part is where all the links for each company product goes.
humanQuestion = """
https://www.dell.com/en-us/shop/dell-laptops/new-xps-13-laptop/spd/xps-13-9345-laptop/usexchbts9345hchl?ref=variantstack
https://www.dell.com/en-us/shop/dell-laptops/new-inspiron-14-plus-laptop/spd/inspiron-14-7441-laptop/useichbts7441hcjp?ref=variantstack
https://www.apple.com/shop/buy-mac/macbook-air/13-inch-m2
https://www.apple.com/shop/buy-mac/macbook-pro
https://us.vaio.com/products/vaio-sx14-vjs146x0311
"""

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages = [
        {"role":"system", "content" : aiRole},
        {"role":"user", "content": humanQuestion}
    ]
)

csv_data = response.choices[0].message.content

In [24]:
# If you want to calculate the costs

# Extract the number of tokens used
prompt_tokens = response.usage.prompt_tokens
completion_tokens = response.usage.completion_tokens
total_tokens = response.usage.total_tokens

# Pricing for GPT-3.5 Turbo
cost_per_1000_tokens_input = 0.0015  # USD
cost_per_1000_tokens_output = 0.002  # USD

# Calculate the cost
input_cost = (prompt_tokens / 1000) * cost_per_1000_tokens_input
output_cost = (completion_tokens / 1000) * cost_per_1000_tokens_output
total_cost = input_cost + output_cost

# Print the results
print(f"Prompt tokens: {prompt_tokens}")
print(f"Completion tokens: {completion_tokens}")
print(f"Total tokens: {total_tokens}")
print(f"Cost in USD: ${total_cost:.6f}")

Prompt tokens: 617
Completion tokens: 1379
Total tokens: 1996
Cost in USD: $0.003684


In [22]:

print (csv_data)

with open ('TEST.txt', 'a') as file:
    file.write(csv_data + '\n')
    

company,data_acquired_date,products,model,product_line,price,discounted_price,cpu_manufacturer,cpu_model,cpu_cores,cpu_threads,cpu_base_clock_speed,cpu_turbo_clock_speed,gpu_manufacturer,gpu_model,gpu_vram,ram_capacity,ram_type,ram_speed,storage_type,storage_capacity,display_size,display_resolution,display_refresh_rate,display_panel_type,dimensions,weight,material,battery_capacity,battery_life,power_supply,usb_ports,hdmi_displayport,audio_ports,network_connectivity,operating_system,pre_installed_software,keyboard,touchpad,security_features,webcam,speakers,cooling_system,warranty,customer_support,url

Dell,2024-06-21,"XPS 13 9345 Laptop",XPS 13,XPS,1449.99,1349.99,Intel,Core i7-8550U,4,8,"1.8 GHz","4.0 GHz",Intel,UHD Graphics 620,8 GB,LPDDR3,1866 MHz,SSD,256 GB,"13.3 inches","1920 x 1080 (Full HD)","60 Hz",IPS,"11.9 x 7.8 x 0.46 inches","2.7 pounds",Aluminum,"52 WHr","Up to 11 hours","45W AC Adapter","2 x USB-C (Thunderbolt 3)","Available via adapter","1 x Headphone/Microphone Combo","W