### 使用流程
1. 建立Insert、Clear等各項function
2. 建立資料庫、表格

In [191]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from datetime import datetime
import warnings
import sqlite3  
output_path='./output/'
db_name = './data/sqlchain.db'
conn = sqlite3.connect(db_name)  

In [11]:
# 基本設定
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format',lambda x: '%.2f' %x)
warnings.filterwarnings("ignore")

In [108]:
# Insert Data
def insertTable(insert_table, records_table):
    # 直接将DataFrame转换成元组列表
    records = [tuple(x) for x in records_table.values]

    conn = sqlite3.connect(db_name)
    try:
        # 假设DataFrame的列名与数据库表的列名一致
        columns = ",".join(records_table.columns)
        placeholders = ", ".join(["?"] * len(records_table.columns))
        insert_data_sql = f"INSERT INTO {insert_table} ({columns}) VALUES ({placeholders})"
        
        conn.executemany(insert_data_sql, records)
        conn.commit()
    except sqlite3.Error as error:
        print(error)
    finally:
        conn.close()


In [113]:
# Clear Data
def clearTable(db_name, table_name):
    """
    清空指定SQLite資料表中的所有數據。

    参数:
    db_name (str): 資料表文件名。
    table_name (str): 要清空的表名。
    """
    try:
        conn = sqlite3.connect(db_name)
        conn.execute(f'''DELETE FROM {table_name};''')
        conn.commit()
    except sqlite3.Error as error:
        print(f"error: {error}")
    finally:
        conn.close()


### Insert CPU 的資料

In [134]:
table_name='cpu'
clearTable(db_name, table_name)

In [135]:
#讀取資料
df_cpu=pd.read_csv(output_path+'cpu.csv')
df_cpu.drop(columns=['class'],inplace=True)
#插入資料
insertTable('cpu', df_cpu)

### Insert GPU 的資料

In [194]:
# 清空資料(這邊是為了避免資料因為爬蟲出現重複，所以先清空table全部資料)
table_name='gpu'
clearTable(db_name, table_name)

In [195]:
#讀取資料
df_gpu=pd.read_csv(output_path+'gpu.csv')
df_gpu.drop(columns=['class','cudacores'],inplace=True)
df_gpu.head()

Unnamed: 0,name,price,etl_date,model,brand,memory,memory_value,memory_unit,corefrequency_value,corefrequency_unit,ddr
0,微星 N210-MD1G/D3(589MHz/1G DDR3/風扇版/14.5cm/三年保),1250,2024-03-17,Unknown,微星,1G,1.0,G,589.0,MHz,3.0
1,華碩 GT710-SL-2GD3-BRK-EVO(954MHz/2G DDR3/17cm/註...,1690,2024-03-17,GT710,華碩,2G,2.0,G,954.0,MHz,3.0
2,華碩 GT710-SL-2GD5-BRK-EVO(954MHz/2G DDR5/17cm/註...,1790,2024-03-17,GT710,華碩,2G,2.0,G,954.0,MHz,5.0
3,技嘉 N710D3-2GL(954MHz/2G DDR3/14.4cm/三年保),1790,2024-03-17,Unknown,技嘉,2G,2.0,G,954.0,MHz,3.0
4,微星 GT710 1GD3H LP(954MHz/1G DDR3/靜音版/14.6cm/三年保),1450,2024-03-17,GT710,微星,1G,1.0,G,954.0,MHz,3.0


In [196]:
#插入資料
insertTable('gpu', df_gpu)

# Insert 硬碟的資料

In [163]:
table_name='hdd'
clearTable(db_name, table_name)

In [164]:
df_hdd=pd.read_csv(output_path+'hdd.csv')
df_hdd.drop(columns=['id','class','capacity'],inplace=True)
#插入資料
insertTable('hdd', df_hdd)

## Insert Ram的資料

In [158]:
table_name='ram'
clearTable(db_name, table_name)

In [159]:
df_ram=pd.read_csv(output_path+'ram.csv')
df_ram.drop(columns=['class'],inplace=True)
#插入資料
insertTable('ram', df_ram)

# Insert 散熱器的資料

In [None]:
table_name='radiator'
clearTable(db_name, table_name)

In [156]:
df_radiator=pd.read_csv(output_path+'cool.csv')
df_radiator.drop(columns=['class'],inplace=True)
#插入資料
insertTable('radiator', df_raditor)

In [152]:
### 驗證查詢
conn = sqlite3.connect(db_name)  
sql1=f'''
select *
from cool
'''
test=pd.read_sql(sql1,conn)
conn.close()
test.head()

Unnamed: 0,ID,NAME,PRICE,MODEL,BRAND,ETL_DATE
0,利民 M.2 2280 TYPE A B SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,利民 M.2 2280 TYPE A B SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,199,利民 M.2 2280 TYPE A B SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,利民,2024-03-03
1,利民 M.2 2280 SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,利民 M.2 2280 SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,350,利民 M.2 2280 SSD 固態硬碟散熱片/鋁合金/單雙面皆適用,利民,2024-03-03
2,利民 M.2 2280 PRO SSD 固態硬碟散熱片/鋁合金+8 mm純銅導管/單雙面皆適用,利民 M.2 2280 PRO SSD 固態硬碟散熱片/鋁合金+8 mm純銅導管/單雙面皆適用,400,利民 M.2 2280 PRO SSD 固態硬碟散熱片/鋁合金+8 mm純銅導管/單雙面皆適用,利民,2024-03-03
3,利民 HR-09 2280 SSD 固態硬碟散熱器/6 mm熱導管/電鍍鰭片/單雙面皆適用,利民 HR-09 2280 SSD 固態硬碟散熱器/6 mm熱導管/電鍍鰭片/單雙面皆適用,550,利民 HR-09 2280 SSD 固態硬碟散熱器/6 mm熱導管/電鍍鰭片/單雙面皆適用,利民,2024-03-03
4,利民 HR-10 2280 PRO SSD 固態硬碟散熱器/4導管/3CM PWM風扇/單雙...,利民 HR-10 2280 PRO SSD 固態硬碟散熱器/4導管/3CM PWM風扇/單雙...,690,利民 HR-10 2280 PRO SSD 固態硬碟散熱器/4導管/3CM PWM風扇/單雙...,利民,2024-03-03


## 處理機殼的資料

In [173]:
table_name='chassis'
clearTable(db_name, table_name)

In [174]:
df_chassis=pd.read_csv(output_path+'chassis.csv')
df_chassis.drop(columns=['class'],inplace=True)
#插入資料
insertTable('chassis', df_chassis)

## 處理電源的資料 

In [175]:
table_name='battery'
clearTable(db_name, table_name)

In [176]:
df_battery=pd.read_csv(output_path+'battery.csv')
df_battery.drop(columns=['class'],inplace=True)
#插入資料
insertTable('battery', df_chassis)

table battery has no column named tdp


### 處理螢幕的資料

In [188]:
df_monitor=pd.read_csv(output_path+'monitor.csv')
df_monitor.head()

Unnamed: 0,class,name,price,etl_date,brand,model,hz
0,螢幕｜投影機｜壁掛,【主機搭購】BenQ GW2475H(1A2H/5ms/IPS/無喇叭)不閃屏.低藍光.護眼...,2488,2024-03-17,BenQ,GW2475H,
1,螢幕｜投影機｜壁掛,【主機搭購】BenQ BL2480(Plus) (1A1H1P/5ms/IPS/含喇叭)三介...,2888,2024-03-17,BenQ,BL2480,
2,螢幕｜投影機｜壁掛,【主機搭購】BenQ MOBIUZ EX2710S(2H1P/IPS/165Hz/含喇叭/F...,5888,2024-03-17,BenQ,MOBIUZ,165Hz
3,螢幕｜投影機｜壁掛,【主機搭購】BenQ MOBIUZ EX2710R(2H1P/1ms/VA曲面/165Hz/...,6688,2024-03-17,BenQ,MOBIUZ,165Hz
4,螢幕｜投影機｜壁掛,【主機搭購】ACER EK241Y E(1A1H/1ms/IPS/100Hz/無喇叭/Fre...,2288,2024-03-17,ACER,EK241Y,100Hz


In [189]:
table_name='monitor'
clearTable(db_name, table_name)

In [190]:
df_monitor=pd.read_csv(output_path+'monitor.csv')
df_monitor.drop(columns=['class'],inplace=True)
#插入資料
insertTable('monitor', df_monitor)