In [None]:
import pathlib
import pandas as pd

In [None]:
ORDERED_COLUMNS = ["title", "type", "category", "range", "symbol", "price", "link", "origin"]

# Lectura de archivos

In [None]:
def read_datasets(files_to_read):
    dataframes = {}
    not_working = []
    for file in files_to_read:
        try:
            df = pd.read_csv(file, sep="|")
            df["origin"] = file
            dataframes[file.split("/")[-1]] = df
        except Exception as e:
            print(e)
            not_working.append(file)

    return dataframes, not_working

In [None]:
files = list(pathlib.Path("../..").joinpath("data").joinpath("raw").iterdir())
files = list(filter(lambda x: not str(x).split("/")[-1].startswith("."), files))
files = list(map(lambda x: str(x.resolve()), files))

In [None]:
dfs, nw = read_datasets(files)

In [None]:
list(dfs.keys())

['amazon_chasis_para_pc.csv',
 'amazon_nvme_m.2.csv',
 'amazon_tarjetas_graficas.csv',
 'amazon_fuentes_de_alimentacion_para_pc.csv',
 'amazon_refrigeracion_pc.csv',
 'amazon_memorias_ram_dd4.csv',
 'amazon_procesadores.csv',
 'amazon_tarjeta_madre.csv',
 'amazon_hdd_interno.csv']

# Limpieza

In [None]:
for key in dfs:
    dfs[key]["symbol"] = dfs[key]["symbol"].apply(lambda x: x.replace("$", ""))
    dfs[key]["price"] = dfs[key]["price"].apply(lambda x: float(str(x).replace(",", "")))

# Procesamiento

## Chasis

In [None]:
chasis_df = dfs["amazon_chasis_para_pc.csv"]

In [None]:
chasis_df["type"] = "Chasis"

In [None]:
chasis_df = chasis_df[chasis_df["price"] >= 100]

In [None]:
chasis_df = chasis_df[~(chasis_df["title"].str.contains("mine") | chasis_df["title"].str.contains("Server"))]

In [None]:
chasis_df.loc[:, "category"] = ""
chasis_df.loc[chasis_df["price"] <= 200, "category"] = "Office"
chasis_df.loc[chasis_df["price"] > 200, "category"] = "Gamer"

In [None]:
chasis_df.loc[:, "range"] = ""
chasis_df.loc[chasis_df["price"] <= 250, "range"] = "Low"
chasis_df.loc[(chasis_df["price"] > 250) & (chasis_df["price"] <= 500), "range"] = "Mid"
chasis_df.loc[chasis_df["price"] > 500, "range"] = "High"

In [None]:
chasis_df = chasis_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_chasis_para_pc.csv"] = chasis_df

In [None]:
dfs["amazon_chasis_para_pc.csv"]

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,MUSETEX ATX - Funda para PC con 6 ventiladores...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/MUSETEX-ATX-ventil...,/work/games_seeker/data/raw/amazon_chasis_para...
1,MUSETEX Phantom Negro ATX Mid-Tower computador...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/Mid-Tower-computad...,/work/games_seeker/data/raw/amazon_chasis_para...
3,Vetroo A03 Mid-Tower ATX Gaming PC Case preins...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_chasis_para...
5,Razer Tomahawk ATX - Funda para juegos de medi...,Chasis,Office,Low,US,199.0,https://www.amazon.com/-/es/Razer-Tomahawk-ATX...,/work/games_seeker/data/raw/amazon_chasis_para...
6,Thermaltake Nivel 20 HT - Torre de computadora...,Chasis,Gamer,Mid,US,299.0,https://www.amazon.com/-/es/Thermaltake-Nivel-...,/work/games_seeker/data/raw/amazon_chasis_para...
...,...,...,...,...,...,...,...,...
170,Segotep Phoenix ATX Black Mid Tower PC Gaming ...,Chasis,Office,Low,US,199.0,https://www.amazon.com/-/es/Segotep-Phoenix-Co...,/work/games_seeker/data/raw/amazon_chasis_para...
171,MUSETEX ATX - Carcasa de policarbonato con 6 v...,Chasis,Office,Low,US,164.0,https://www.amazon.com/-/es/MUSETEX-ATX-polica...,/work/games_seeker/data/raw/amazon_chasis_para...
173,"Mini ITX PC caso B6, caja de ordenador de alum...",Chasis,Office,Low,US,189.0,https://www.amazon.com/-/es/B6-ordenador-alumi...,/work/games_seeker/data/raw/amazon_chasis_para...
176,SilverStone Technology Mini-ITX ML08B-H - Carc...,Chasis,Office,Low,US,143.0,https://www.amazon.com/-/es/SilverStone-Techno...,/work/games_seeker/data/raw/amazon_chasis_para...


## Almacenamiento

### SSD

In [None]:
ssd_df = dfs["amazon_nvme_m.2.csv"]

In [None]:
ssd_df = ssd_df[(ssd_df["title"].str.contains("GB")) | (ssd_df["title"].str.contains("TB"))]

In [None]:
ssd_df.loc[:, "type"] = "Storage"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
ssd_df.loc[:, "category"] = ""
ssd_df.loc[ssd_df["price"] <= 200, "category"] = "Stream"
ssd_df.loc[(ssd_df["price"] > 200) & (ssd_df["price"] <= 550), "category"] = "Gamer"
ssd_df.loc[ssd_df["price"] > 550, "category"] = "Desing"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
ssd_df.loc[:, "range"] = ""
ssd_df.loc[ssd_df["price"] <= 250, "range"] = "Mid"
ssd_df.loc[ssd_df["price"] > 250, "range"] = "High"

In [None]:
ssd_df

Unnamed: 0,title,symbol,price,link,origin,type,category,range
0,Crucial P5 Plus SSD 3D NAND NVMe M.2 de 500GB ...,US,84.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Stream,Mid
1,Sabrent Rocket Q 4TB NVMe PCIe M.2 2280 Unidad...,US,599.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Desing,High
2,SAMSUNG 980 SSD de 1 TB M.2 NVMe Interfaz inte...,US,99.0,https://www.amazon.com/-/es/Interfaz-tecnolog%...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Stream,Mid
3,SAMSUNG 970 EVO Plus SSD 2TB - M.2 NVMe Interf...,US,209.0,https://www.amazon.com/-/es/SAMSUNG-970-EVO-Pl...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Gamer,Mid
4,SSD M.2 980 PRO 2TB PCIe NVMe Gen4 interno par...,US,279.0,https://www.amazon.com/-/es/PCIe-interno-juego...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Gamer,High
...,...,...,...,...,...,...,...,...
214,TEAMGROUP T-Force CARDEA Zero Z440 - Disco de ...,US,259.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Gamer,High
217,"Centon Premium SSD, libre de frustración, PCIe...",US,579.0,https://www.amazon.com/-/es/Centon-Premium-lib...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Desing,High
220,Samsung 256GB PM981 SSD PCIe Gen3 x4 NVMe M.2 ...,US,99.0,https://www.amazon.com/-/es/Samsung-PM981-MZVL...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Stream,Mid
223,Mushkin Source-II - Unidad interna de estado s...,US,174.0,https://www.amazon.com/-/es/Mushkin-Source-II-...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv,Storage,Stream,Mid


In [None]:
dfs["amazon_nvme_m.2.csv"] = ssd_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_nvme_m.2.csv"]

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,Crucial P5 Plus SSD 3D NAND NVMe M.2 de 500GB ...,Storage,Stream,Mid,US,84.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
1,Sabrent Rocket Q 4TB NVMe PCIe M.2 2280 Unidad...,Storage,Desing,High,US,599.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
2,SAMSUNG 980 SSD de 1 TB M.2 NVMe Interfaz inte...,Storage,Stream,Mid,US,99.0,https://www.amazon.com/-/es/Interfaz-tecnolog%...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
3,SAMSUNG 970 EVO Plus SSD 2TB - M.2 NVMe Interf...,Storage,Gamer,Mid,US,209.0,https://www.amazon.com/-/es/SAMSUNG-970-EVO-Pl...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
4,SSD M.2 980 PRO 2TB PCIe NVMe Gen4 interno par...,Storage,Gamer,High,US,279.0,https://www.amazon.com/-/es/PCIe-interno-juego...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
...,...,...,...,...,...,...,...,...
214,TEAMGROUP T-Force CARDEA Zero Z440 - Disco de ...,Storage,Gamer,High,US,259.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
217,"Centon Premium SSD, libre de frustración, PCIe...",Storage,Desing,High,US,579.0,https://www.amazon.com/-/es/Centon-Premium-lib...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
220,Samsung 256GB PM981 SSD PCIe Gen3 x4 NVMe M.2 ...,Storage,Stream,Mid,US,99.0,https://www.amazon.com/-/es/Samsung-PM981-MZVL...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv
223,Mushkin Source-II - Unidad interna de estado s...,Storage,Stream,Mid,US,174.0,https://www.amazon.com/-/es/Mushkin-Source-II-...,/work/games_seeker/data/raw/amazon_nvme_m.2.csv


### HDD

In [None]:
hdd_df = dfs["amazon_hdd_interno.csv"]

In [None]:
hdd_df = hdd_df[(~hdd_df["title"].str.contains("exte")) & (hdd_df["price"] > 20)]

In [None]:
hdd_df.loc[:, "type"] = "Storage"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
hdd_df

Unnamed: 0,title,symbol,price,link,origin,type
0,"Seagate IronWolf NAS - Disco duro interno HDD,...",US,279.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
1,Toshiba X300 - Disco duro interno de 10 TB de ...,US,228.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
2,Seagate BarraCuda - Disco duro interno de 2 TB...,US,46.0,https://www.amazon.com/-/es/Seagate-BarraCuda-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
3,Unidad de disco duro Seagate Exos 7E2,US,238.0,https://www.amazon.com/-/es/Unidad-disco-duro-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
4,"Seagate IronWolf NAS - Disco duro interno HDD,...",US,199.0,https://www.amazon.com/-/es/Seagate-IronWolf-N...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
...,...,...,...,...,...,...
166,Seagate FireCuda 2TB unidad híbrida de estado ...,US,199.0,https://www.amazon.com/-/es/Seagate-FireCuda-u...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
167,Seagate Cheetah SAS 16 MB Cache 2.5-Inch inter...,US,54.0,https://www.amazon.com/-/es/2-5-Inch-bare-oem-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
168,"Western Digital 3TB SATA 6 Gb/s 3.5""",US,148.0,https://www.amazon.com/-/es/Western-Digital-3T...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage
169,Seagate Cheetah 10K.7 ST3300007FC - Disco duro...,US,104.0,https://www.amazon.com/-/es/Seagate-Cheetah-10...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage


In [None]:
hdd_df.loc[:, "category"] = ""
hdd_df.loc[hdd_df["price"] <= 50, "category"] = "Office"
hdd_df.loc[(hdd_df["price"] > 50) & (hdd_df["price"] <= 200), "category"] = "Gamer"
hdd_df.loc[hdd_df["price"] > 200, "category"] = "Desing"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
hdd_df.loc[:, "range"] = ""
hdd_df.loc[hdd_df["price"] <= 35, "range"] = "Low"
hdd_df.loc[(hdd_df["price"] > 35) & (hdd_df["price"] <= 100), "range"] = "Mid"
hdd_df.loc[hdd_df["price"] > 100, "range"] = "High"

In [None]:
dfs["amazon_hdd_interno.csv"] = hdd_df[ORDERED_COLUMNS]

In [None]:
hdd_df

Unnamed: 0,title,symbol,price,link,origin,type,category,range
0,"Seagate IronWolf NAS - Disco duro interno HDD,...",US,279.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Desing,High
1,Toshiba X300 - Disco duro interno de 10 TB de ...,US,228.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Desing,High
2,Seagate BarraCuda - Disco duro interno de 2 TB...,US,46.0,https://www.amazon.com/-/es/Seagate-BarraCuda-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Office,Mid
3,Unidad de disco duro Seagate Exos 7E2,US,238.0,https://www.amazon.com/-/es/Unidad-disco-duro-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Desing,High
4,"Seagate IronWolf NAS - Disco duro interno HDD,...",US,199.0,https://www.amazon.com/-/es/Seagate-IronWolf-N...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Gamer,High
...,...,...,...,...,...,...,...,...
166,Seagate FireCuda 2TB unidad híbrida de estado ...,US,199.0,https://www.amazon.com/-/es/Seagate-FireCuda-u...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Gamer,High
167,Seagate Cheetah SAS 16 MB Cache 2.5-Inch inter...,US,54.0,https://www.amazon.com/-/es/2-5-Inch-bare-oem-...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Gamer,Mid
168,"Western Digital 3TB SATA 6 Gb/s 3.5""",US,148.0,https://www.amazon.com/-/es/Western-Digital-3T...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Gamer,High
169,Seagate Cheetah 10K.7 ST3300007FC - Disco duro...,US,104.0,https://www.amazon.com/-/es/Seagate-Cheetah-10...,/work/games_seeker/data/raw/amazon_hdd_interno...,Storage,Gamer,High


## Tarjeta grafica

In [None]:
gpu_df = dfs["amazon_tarjetas_graficas.csv"]

In [None]:
gpu_df = gpu_df[gpu_df["price"] > 150]

In [None]:
gpu_df.loc[:, "type"] = "GPU"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
gpu_df.loc[:, "category"] = 'Gamer/Desing'

In [None]:
gpu_df

Unnamed: 0,title,symbol,price,link,origin,type,category
0,GT 1030 4GB GDRR4 64 bits HDMI DVI salida tarj...,US,159.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
1,"SAPLOS GTX 1050 Ti Tarjeta gráfica, 4 GB, 128 ...",US,359.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
2,ZOTAC Gaming GeForce GTX 1660 Super 6GB GDDR6 ...,US,523.0,https://www.amazon.com/-/es/Geforce-GTX-1660-S...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
3,ZOTAC Gaming GeForce RTX 3060 Twin Edge OC 12 ...,US,699.0,https://www.amazon.com/-/es/GeForce-refrigerac...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
4,XFX Speedster SWFT 210 Radeon RX 6600 CORE - T...,US,459.0,https://www.amazon.com/-/es/Speedster-SWFT-Rad...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
...,...,...,...,...,...,...,...
123,ASUS TUF Gaming NVIDIA GeForce GTX 1660 Ti EVO...,US,589.0,https://www.amazon.com/-/es/GeForce-DisplayPor...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
126,PNY NVIDIA Quadro K1200 - (VCQK1200DP-PB),US,318.0,https://www.amazon.com/-/es/PNY-NVIDIA-Quadro-...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
127,KAER Tarjeta gráfica AMD Radeon RX 550 de 4 GB...,US,199.0,https://www.amazon.com/gp/slredirect/picassoRe...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing
128,Tarjeta gráfica GeForce GT 1030 4GB GDDR4 64bi...,US,185.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...,GPU,Gamer/Desing


In [None]:
gpu_df.loc[:, "range"] = ""
gpu_df.loc[gpu_df["price"] <= 250, "range"] = "Low"
gpu_df.loc[(gpu_df["price"] > 250) & (gpu_df["price"] <= 700), "range"] = "Mid"
gpu_df.loc[gpu_df["price"] > 700, "range"] = "High"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
dfs["amazon_tarjetas_graficas.csv"] = gpu_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_tarjetas_graficas.csv"]

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,GT 1030 4GB GDRR4 64 bits HDMI DVI salida tarj...,GPU,Gamer/Desing,Low,US,159.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
1,"SAPLOS GTX 1050 Ti Tarjeta gráfica, 4 GB, 128 ...",GPU,Gamer/Desing,Mid,US,359.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
2,ZOTAC Gaming GeForce GTX 1660 Super 6GB GDDR6 ...,GPU,Gamer/Desing,Mid,US,523.0,https://www.amazon.com/-/es/Geforce-GTX-1660-S...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
3,ZOTAC Gaming GeForce RTX 3060 Twin Edge OC 12 ...,GPU,Gamer/Desing,Mid,US,699.0,https://www.amazon.com/-/es/GeForce-refrigerac...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
4,XFX Speedster SWFT 210 Radeon RX 6600 CORE - T...,GPU,Gamer/Desing,Mid,US,459.0,https://www.amazon.com/-/es/Speedster-SWFT-Rad...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
...,...,...,...,...,...,...,...,...
123,ASUS TUF Gaming NVIDIA GeForce GTX 1660 Ti EVO...,GPU,Gamer/Desing,Mid,US,589.0,https://www.amazon.com/-/es/GeForce-DisplayPor...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
126,PNY NVIDIA Quadro K1200 - (VCQK1200DP-PB),GPU,Gamer/Desing,Mid,US,318.0,https://www.amazon.com/-/es/PNY-NVIDIA-Quadro-...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
127,KAER Tarjeta gráfica AMD Radeon RX 550 de 4 GB...,GPU,Gamer/Desing,Low,US,199.0,https://www.amazon.com/gp/slredirect/picassoRe...,/work/games_seeker/data/raw/amazon_tarjetas_gr...
128,Tarjeta gráfica GeForce GT 1030 4GB GDDR4 64bi...,GPU,Gamer/Desing,Low,US,185.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_tarjetas_gr...


## Fuentes de alimentación

In [None]:
power_df = dfs["amazon_fuentes_de_alimentacion_para_pc.csv"]

In [None]:
power_df = power_df[power_df["price"] > 100]

In [None]:
power_df.loc[:, "type"] = "Power Supply"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
power_df.loc[:, "category"] = ""
power_df.loc[power_df["price"] <= 250, "category"] = "Office"
power_df.loc[power_df["price"] > 250, "category"] = "Gamer"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
power_df.loc[:, "range"] = ""
power_df.loc[power_df["price"] <= 250, "range"] = "Low"
power_df.loc[(power_df["price"] > 250) & (power_df["price"] <= 400), "range"] = "Mid"
power_df.loc[power_df["price"] > 400, "range"] = "High"

In [None]:
dfs["amazon_fuentes_de_alimentacion_para_pc.csv"] = power_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_fuentes_de_alimentacion_para_pc.csv"] 

Unnamed: 0,title,type,category,range,symbol,price,link,origin
1,Fuente de alimentación PSU de 2000 W para comp...,Power Supply,Office,Low,US,198.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_fuentes_de_...
2,Corsair Series Watt Gold 80+ – Fuente de alime...,Power Supply,Office,Low,US,114.0,https://www.amazon.com/-/es/Corsair-Watt-Gold-...,/work/games_seeker/data/raw/amazon_fuentes_de_...
8,"EVGA Supernova 120-GP-0850-X1, 850 G+, 80 Plus...",Power Supply,Office,Low,US,127.0,https://www.amazon.com/-/es/120-GP-0850-X1-tot...,/work/games_seeker/data/raw/amazon_fuentes_de_...
10,"EVGA SuperNOVA 1000 G5, 80 Plus Gold 1000W, to...",Power Supply,Office,Low,US,170.0,https://www.amazon.com/-/es/1000-G5-totalmente...,/work/games_seeker/data/raw/amazon_fuentes_de_...
12,Modo ECO Totalmente Modular,Power Supply,Office,Low,US,237.0,https://www.amazon.com/-/es/220-T2-0850-X1-Mod...,/work/games_seeker/data/raw/amazon_fuentes_de_...
...,...,...,...,...,...,...,...,...
222,AmpFlow SCN-1000-12 Fuente de alimentaci&oacut...,Power Supply,Gamer,Mid,US,299.0,https://www.amazon.com/-/es/AmpFlow-SCN-1000-1...,/work/games_seeker/data/raw/amazon_fuentes_de_...
225,Tecnología Silverstone,Power Supply,Gamer,Mid,US,269.0,https://www.amazon.com/-/es/SST-ST1200-PTS-Tec...,/work/games_seeker/data/raw/amazon_fuentes_de_...
227,1800 W minería fuente de alimentación apoyo 8 ...,Power Supply,Office,Low,US,169.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_fuentes_de_...
233,SilverStone Technology Fuente de alimentación ...,Power Supply,Office,Low,US,149.0,https://www.amazon.com/-/es/SilverStone-Techno...,/work/games_seeker/data/raw/amazon_fuentes_de_...


## Refrigeración

In [None]:
ref_df = dfs["amazon_refrigeracion_pc.csv"]

In [None]:
ref_df = ref_df[ref_df["price"] > 100]

In [None]:
ref_df.loc[:, "type"] = "Refrigeration"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
ref_df.loc[:, "category"] = ""
ref_df.loc[ref_df["price"] <= 200, "category"] = "Office"
ref_df.loc[ref_df["price"] > 200, "category"] = "Gamer"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
ref_df.loc[:, "range"] = ""
ref_df.loc[ref_df["price"] <= 150, "range"] = "Low"
ref_df.loc[(ref_df["price"] > 150) & (ref_df["price"] <= 400), "range"] = "Mid"
ref_df.loc[ref_df["price"] > 400, "range"] = "High"

In [None]:
dfs["amazon_refrigeracion_pc.csv"] = ref_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_refrigeracion_pc.csv"] 

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,DeepCool Castle 360EX A-RGB WH AIO Enfriador d...,Refrigeration,Office,Low,US,129.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...
1,Cooler Master MasterLiquid ML360 Illusion Clos...,Refrigeration,Office,Mid,US,169.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...
2,"Corsair iCUE H100i RGB Pro XT, Radiador de 9.4...",Refrigeration,Office,Low,US,117.0,https://www.amazon.com/-/es/Corsair-iCUE-H100i...,/work/games_seeker/data/raw/amazon_refrigeraci...
3,"Corsair iCUE H100i RGB Pro XT, Radiador de 9.4...",Refrigeration,Office,Mid,US,179.0,https://www.amazon.com/-/es/Corsair-iCUE-H100i...,/work/games_seeker/data/raw/amazon_refrigeraci...
7,Corsair Hydro X Series XH305i Kit de refrigera...,Refrigeration,Gamer,High,US,465.0,https://www.amazon.com/-/es/Corsair-refrigerac...,/work/games_seeker/data/raw/amazon_refrigeraci...
14,Cooler Master MasterAir MA620M Dual Tower ARGB...,Refrigeration,Office,Low,US,121.0,https://www.amazon.com/-/es/MasterAir-Enfriado...,/work/games_seeker/data/raw/amazon_refrigeraci...
20,OWC Aura P12 M.2 NVMe SSD unidad de disco óptico,Refrigeration,Gamer,Mid,US,329.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...
22,Cooler Master MasterLiquid ML360 Illusion Clos...,Refrigeration,Office,Mid,US,169.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...
32,DeepCool Castle 360EX A-RGB WH AIO Enfriador d...,Refrigeration,Office,Low,US,129.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...
38,Cooler Master MasterLiquid ML360 Illusion Clos...,Refrigeration,Office,Mid,US,169.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_refrigeraci...


## RAM

In [None]:
ram_df = dfs["amazon_memorias_ram_dd4.csv"]

In [None]:
ram_df = ram_df[(ram_df["price"] > 50) & (ram_df["title"].str.contains("GB"))]

In [None]:
ram_df.loc[:, "type"] = "RAM"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
ram_df.loc[:, "category"] = ""
ram_df.loc[ram_df["price"] <= 250, "category"] = "Office"
ram_df.loc[ram_df["price"] > 250, "category"] = "Gamer"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
ram_df.loc[:, "range"] = ""
ram_df.loc[ram_df["price"] <= 150, "range"] = "Low"
ram_df.loc[(ram_df["price"] > 150) & (ram_df["price"] <= 600), "range"] = "Mid"
ram_df.loc[ram_df["price"] > 600, "range"] = "High"

In [None]:
dfs["amazon_memorias_ram_dd4.csv"] = ram_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_memorias_ram_dd4.csv"] 

Unnamed: 0,title,type,category,range,symbol,price,link,origin
3,v-Color Prism RGB DDR4 16 GB (2 x 8 GB) 3200 M...,RAM,Office,Low,US,70.0,https://www.amazon.com/-/es/v-Color-PC4-25600-...,/work/games_seeker/data/raw/amazon_memorias_ra...
5,Corsair Vengeance LPX 16GB (2 x 8GB) DDR4 3600...,RAM,Office,Low,US,79.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...
7,Corsair Vengeance RGB Pro 32 GB (2 x 16 GB) DD...,RAM,Office,Mid,US,162.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...
13,Corsair Vengeance RGB Pro 32GB (2x16GB) DDR4 3...,RAM,Office,Low,US,149.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...
15,Corsair Vengeance RGB Pro 16GB (2x8GB) DDR4 32...,RAM,Office,Low,US,89.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...
...,...,...,...,...,...,...,...,...
275,Nemix Ram - Memoria registrada de Nemix Ram (3...,RAM,Office,Mid,US,159.0,https://www.amazon.com/-/es/Nemix-Ram-registra...,/work/games_seeker/data/raw/amazon_memorias_ra...
276,G.Skill Trident Z NEO Series 64GB (2 x 32GB) 2...,RAM,Gamer,Mid,US,309.0,https://www.amazon.com/-/es/G-Skill-Trident-PC...,/work/games_seeker/data/raw/amazon_memorias_ra...
277,TEAMGROUP T-Create Expert Overclocking 10L DDR...,RAM,Office,Low,US,105.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_memorias_ra...
279,Corsair Vengeance RGB Pro 16 GB (2 x 8 GB) DDR...,RAM,Office,Low,US,95.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...


In [None]:
ram_df

Unnamed: 0,title,symbol,price,link,origin,type,category,range
3,v-Color Prism RGB DDR4 16 GB (2 x 8 GB) 3200 M...,US,70.0,https://www.amazon.com/-/es/v-Color-PC4-25600-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low
5,Corsair Vengeance LPX 16GB (2 x 8GB) DDR4 3600...,US,79.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low
7,Corsair Vengeance RGB Pro 32 GB (2 x 16 GB) DD...,US,162.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Mid
13,Corsair Vengeance RGB Pro 32GB (2x16GB) DDR4 3...,US,149.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low
15,Corsair Vengeance RGB Pro 16GB (2x8GB) DDR4 32...,US,89.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low
...,...,...,...,...,...,...,...,...
275,Nemix Ram - Memoria registrada de Nemix Ram (3...,US,159.0,https://www.amazon.com/-/es/Nemix-Ram-registra...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Mid
276,G.Skill Trident Z NEO Series 64GB (2 x 32GB) 2...,US,309.0,https://www.amazon.com/-/es/G-Skill-Trident-PC...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Gamer,Mid
277,TEAMGROUP T-Create Expert Overclocking 10L DDR...,US,105.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low
279,Corsair Vengeance RGB Pro 16 GB (2 x 8 GB) DDR...,US,95.0,https://www.amazon.com/-/es/Corsair-Vengeance-...,/work/games_seeker/data/raw/amazon_memorias_ra...,RAM,Office,Low


## Procesador

In [None]:
cpu_df = dfs["amazon_procesadores.csv"]

In [None]:
cpu_df = cpu_df[cpu_df["price"] > 100]

In [None]:
cpu_df.loc[:, "type"] = "CPU"

In [None]:
cpu_df.loc[:, "category"] = ""
cpu_df.loc[cpu_df["price"] <= 150, "category"] = "Office"
cpu_df.loc[cpu_df["price"] > 150, "category"] = "Gamer"

In [None]:
cpu_df.loc[:, "range"] = ""
cpu_df.loc[cpu_df["price"] <= 150, "range"] = "Low"
cpu_df.loc[(cpu_df["price"] > 150) & (cpu_df["price"] <= 300), "range"] = "Mid"
cpu_df.loc[cpu_df["price"] > 300, "range"] = "High"

In [None]:
dfs["amazon_procesadores.csv"] = cpu_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_procesadores.csv"] 

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,AMD Ryzen 5 5600G Procesador de escritorio des...,CPU,Gamer,Mid,US,216.0,https://www.amazon.com/-/es/AMD-Ryzen-5600G-Pr...,/work/games_seeker/data/raw/amazon_procesadore...
1,AMD Ryzen 9 5900X - Procesador de escritorio d...,CPU,Gamer,High,US,449.0,https://www.amazon.com/-/es/AMD-Ryzen-5900X-Pr...,/work/games_seeker/data/raw/amazon_procesadore...
2,AMD Ryzen 5 5600X 6 núcleos y 12 hilos desbloq...,CPU,Gamer,Mid,US,228.0,https://www.amazon.com/-/es/AMD-Ryzen-5600X-de...,/work/games_seeker/data/raw/amazon_procesadore...
3,AMD Ryzen 9 5950X - Procesador de escritorio d...,CPU,Gamer,High,US,598.0,https://www.amazon.com/-/es/AMD-Ryzen-5950X-Pr...,/work/games_seeker/data/raw/amazon_procesadore...
4,Procesador Intel Core i5-10400 de 6 núcleos de...,CPU,Office,Low,US,148.0,https://www.amazon.com/-/es/Procesador-i5-1040...,/work/games_seeker/data/raw/amazon_procesadore...
...,...,...,...,...,...,...,...,...
88,Procesador Intel Pentium Gold G5420 Dual Core ...,CPU,Office,Low,US,137.0,https://www.amazon.com/-/es/Procesador-Intel-P...,/work/games_seeker/data/raw/amazon_procesadore...
89,Core i9 Octa-core i9-9900K 3.6GHz procesador d...,CPU,Gamer,High,US,574.0,https://www.amazon.com/-/es/Octa-core-i9-9900K...,/work/games_seeker/data/raw/amazon_procesadore...
90,Intel Core i7-2760QM SR02W PGA 988B G2 Procesa...,CPU,Office,Low,US,138.0,https://www.amazon.com/-/es/Intel-i7-2760QM-SR...,/work/games_seeker/data/raw/amazon_procesadore...
92,Intel Core i9-10900F Comet Lake 2.8GHz 20MB Sm...,CPU,Gamer,High,US,435.0,https://www.amazon.com/-/es/Intel-i9-10900F-2-...,/work/games_seeker/data/raw/amazon_procesadore...


## Placas

In [None]:
board_df = dfs["amazon_tarjeta_madre.csv"]

In [None]:
board_df = board_df[board_df["price"] > 100]

In [None]:
board_df.loc[:, "type"] = "Mother Board"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [None]:
board_df.loc[:, "category"] = ""
board_df.loc[board_df["price"] <= 150, "category"] = "Office"
board_df.loc[board_df["price"] > 150, "category"] = "Gamer"

In [None]:
board_df.loc[:, "range"] = ""
board_df.loc[board_df["price"] <= 150, "range"] = "Low"
board_df.loc[(board_df["price"] > 150) & (board_df["price"] <= 300), "range"] = "Mid"
board_df.loc[board_df["price"] > 300, "range"] = "High"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
dfs["amazon_tarjeta_madre.csv"] = board_df[ORDERED_COLUMNS]

In [None]:
dfs["amazon_tarjeta_madre.csv"] 

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,ASUS Prime Z590-A LGA 1200 (Intel® 11ª/10ª gen...,Mother Board,Gamer,Mid,US,259.0,https://www.amazon.com/-/es/Intel%C2%AE-genera...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
1,ASUS ROG Strix B450-F Gaming II AMD AM4 (Ryzen...,Mother Board,Gamer,Mid,US,199.0,https://www.amazon.com/-/es/ROG-B450-F-Gaming-...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
3,ASUS ROG Strix B550-F Gaming (WiFi 6) AMD AM4 ...,Mother Board,Gamer,Mid,US,194.0,https://www.amazon.com/-/es/ROG-B550-F-Motherb...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
4,ASUS ROG Strix B550-F Gaming AMD AM4 Zen 3 Ryz...,Mother Board,Gamer,Mid,US,178.0,https://www.amazon.com/-/es/ROG-B550-F-Motherb...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
5,GIGABYTE B450 AORUS PRO Wi-Fi (AMD Ryzen AM4/A...,Mother Board,Office,Low,US,109.0,https://www.amazon.com/-/es/B450-AORUS-PRO-Pro...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
...,...,...,...,...,...,...,...,...
142,Supermicro X11SDV-8C-TLN2F Motherboard Mini-IT...,Mother Board,Gamer,High,US,1399.0,https://www.amazon.com/-/es/Supermicro-X11SDV-...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
143,Supermicro X11SDV-8C+-TLN2F - Placa base Mini-...,Mother Board,Gamer,High,US,1399.0,https://www.amazon.com/-/es/Supermicro-X11SDV-...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
144,Supermicro x11ddw-nt placa base,Mother Board,Gamer,High,US,766.0,https://www.amazon.com/-/es/MBD-X11DDW-NT-O-Su...,/work/games_seeker/data/raw/amazon_tarjeta_mad...
146,700846-001 HP Jasmine AMD Desktop Motherboard FM2,Mother Board,Office,Low,US,118.0,https://www.amazon.com/-/es/700846-001-Jasmine...,/work/games_seeker/data/raw/amazon_tarjeta_mad...


# Export results

In [None]:
df = pd.concat(dfs.values()).reset_index(drop=True)

In [None]:
df

Unnamed: 0,title,type,category,range,symbol,price,link,origin
0,MUSETEX ATX - Funda para PC con 6 ventiladores...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/MUSETEX-ATX-ventil...,/work/games_seeker/data/raw/amazon_chasis_para...
1,MUSETEX Phantom Negro ATX Mid-Tower computador...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/Mid-Tower-computad...,/work/games_seeker/data/raw/amazon_chasis_para...
2,Vetroo A03 Mid-Tower ATX Gaming PC Case preins...,Chasis,Office,Low,US,109.0,https://www.amazon.com/-/es/gp/slredirect/pica...,/work/games_seeker/data/raw/amazon_chasis_para...
3,Razer Tomahawk ATX - Funda para juegos de medi...,Chasis,Office,Low,US,199.0,https://www.amazon.com/-/es/Razer-Tomahawk-ATX...,/work/games_seeker/data/raw/amazon_chasis_para...
4,Thermaltake Nivel 20 HT - Torre de computadora...,Chasis,Gamer,Mid,US,299.0,https://www.amazon.com/-/es/Thermaltake-Nivel-...,/work/games_seeker/data/raw/amazon_chasis_para...
...,...,...,...,...,...,...,...,...
916,Seagate FireCuda 2TB unidad híbrida de estado ...,Storage,Gamer,High,US,199.0,https://www.amazon.com/-/es/Seagate-FireCuda-u...,/work/games_seeker/data/raw/amazon_hdd_interno...
917,Seagate Cheetah SAS 16 MB Cache 2.5-Inch inter...,Storage,Gamer,Mid,US,54.0,https://www.amazon.com/-/es/2-5-Inch-bare-oem-...,/work/games_seeker/data/raw/amazon_hdd_interno...
918,"Western Digital 3TB SATA 6 Gb/s 3.5""",Storage,Gamer,High,US,148.0,https://www.amazon.com/-/es/Western-Digital-3T...,/work/games_seeker/data/raw/amazon_hdd_interno...
919,Seagate Cheetah 10K.7 ST3300007FC - Disco duro...,Storage,Gamer,High,US,104.0,https://www.amazon.com/-/es/Seagate-Cheetah-10...,/work/games_seeker/data/raw/amazon_hdd_interno...


In [None]:
df.to_csv("/work/games_seeker/data/interim/pc_components.csv", sep="|", index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=07e29549-6cbf-4e88-b69c-0fc48f2d6023' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>