# PC PREDICTION APP

In [581]:
import os
import numpy as np
import pandas as pd

In [582]:
files = os.listdir("incehesap")
df = pd.concat([pd.read_csv(f"incehesap/{file}") for file in files], ignore_index=True)

In [583]:
print(df.info)

<bound method DataFrame.info of                                     aspect-square href  \
0    https://www.incehesap.com/asus-vivobook-15-x15...   
1    https://www.incehesap.com/asus-vivobook-15-x15...   
2    https://www.incehesap.com/lenovo-v15-g4-82yy00...   
3    https://www.incehesap.com/lenovo-v15-g3-82tv00...   
4    https://www.incehesap.com/lenovo-loq-83gs00pht...   
..                                                 ...   
344  https://www.incehesap.com/lenovo-thinkbook-16p...   
345  https://www.incehesap.com/lenovo-thinkpad-l15-...   
346  https://www.incehesap.com/lenovo-v15-g3-82tv00...   
347  https://www.incehesap.com/acer-extensa-15-ex21...   
348  https://www.incehesap.com/acer-nitro-5-an515-4...   

                                     aspect-square src  \
0    https://www.incehesap.com/resim/urun/202504/67...   
1    https://www.incehesap.com/resim/urun/202504/67...   
2    https://www.incehesap.com/resim/urun/202503/67...   
3    https://www.incehesap.com/resim/ur

In [584]:
print(df.columns)

Index(['aspect-square href', 'aspect-square src', 'overflow-hidden', 'text-xs',
       'text-xs 2', 'text-xs 3', 'text-xs 4', 'text-xs 5', 'text-2xl',
       'text-xs 6', 'flex', 'flex href', '!ml-0'],
      dtype='object')


In [585]:
def drop_columns(df, cols):
    """
    This function removes the columns from a DataFrame.
    :param df:
    :param cols:
    :return:
    """

    for col in cols:
        try:
            df.drop(col, axis = 1, inplace=True)
        except Exception as e:
            print(f"An error has occured : {e}")

In [586]:
cols = ['aspect-square href', 'aspect-square src', 'text-xs 5',
       'text-xs 6', 'flex', 'flex href', '!ml-0']
drop_columns(df,cols)

In [587]:
print(df.info)

<bound method DataFrame.info of                                        overflow-hidden        text-xs  \
0    ASUS Vivobook 15 X1504VA-NJ2405 IH6 i5-1334U 4...  Intel Core i5   
1    ASUS Vivobook 15 X1504VA-NJ2405 IH1 i5-1334U 1...  Intel Core i5   
2    Lenovo V15 G4 82YY0013TR IH5 Ryzen 7 7730U 40G...    AMD Ryzen 7   
3    Lenovo V15 G3 82TV009CTX IH1 Ryzen 7 5825U 16G...    AMD Ryzen 7   
4    Lenovo  LOQ 83GS00PHTR IH5 i5-12450HX 32GB 512...  INTEL Core İ5   
..                                                 ...            ...   
344  Lenovo ThinkBook 16p G4 21J80032TR i7-13700H 3...  Intel Core i7   
345  Lenovo ThinkPad L15 21C7002LTX G3 Ryzen 7 Pro ...    AMD Ryzen 7   
346  Lenovo V15 G3 82TV004MTX Ryzen7-5825U 16GB 512...    AMD Ryzen 7   
347  Acer Extensa 15 EX215-54-57KW NX.EGJEY.006 i5-...  Intel Core i5   
348  Acer Nitro 5 AN515-46-R6KM NH.QGXEY.005 Ryzen ...    AMD Ryzen 5   

         text-xs 2                text-xs 3   text-xs 4   text-2xl  
0            40 GB    

In [588]:
df['feature'] = df['overflow-hidden'].apply(lambda x: x.split(" "))
df['brand'] = df['feature'].str[0]

In [589]:
drop_columns(df, ['feature', 'overflow-hidden'])

In [590]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 349 entries, 0 to 348
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text-xs    349 non-null    object
 1   text-xs 2  349 non-null    object
 2   text-xs 3  349 non-null    object
 3   text-xs 4  349 non-null    object
 4   text-2xl   241 non-null    object
 5   brand      349 non-null    object
dtypes: object(6)
memory usage: 16.5+ KB
None


In [591]:
print(df[["brand"]].sample(50))

               brand
176             Dell
250           Quadro
288              MSI
237              MSI
51          GIGABYTE
344           Lenovo
116             ASUS
347             Acer
190           Lenovo
217           Lenovo
59            LENOVO
38     Asus Vivobook
239              MSI
308               HP
282           Lenovo
302               HP
151           Lenovo
300              MSI
290               HP
180               HP
231           Lenovo
23                HP
24                HP
126           Lenovo
97                HP
128           Lenovo
88                HP
210               HP
166               HP
139           Lenovo
57            LENOVO
171       Lenovo LOQ
339               HP
336           Lenovo
280           Lenovo
242              MSI
329               HP
274              MSI
342              MSI
214           Lenovo
338           Lenovo
230              MSI
100               HP
307               HP
78                HP
304               HP
41   Asus Viv

In [592]:
print(df["brand"].unique())


['ASUS' 'Lenovo' 'Lenovo\xa0' 'Lenovo\xa0LOQ' 'MSI' 'HP' 'Asus'
 'Asus\xa0Vivobook' 'Asus\xa0Vivobook16' 'GIGABYTE' 'LENOVO' 'DELL' 'Dell'
 'Quadro' 'Hp' 'Technopc' 'Acer']


In [593]:
replace_dict = {
    'ASUS': 'Asus',
    'Asus\xa0Vivobook': 'Asus',
    'Asus\xa0Vivobook16': 'Asus',
    'LENOVO': 'Lenovo',
    'Lenovo\xa0LOQ': 'Lenovo',
    'Lenovo\xa0': 'Lenovo',
    'DELL': 'Dell',
    'HP': 'Hp',
}
df['brand'] = df['brand'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['brand'].dropna(inplace=True)
df['brand'].unique()

array(['Asus', 'Lenovo', 'MSI', 'Hp', 'GIGABYTE', 'Dell', 'Quadro',
       'Technopc', 'Acer'], dtype=object)

In [594]:
print(df['text-xs'].unique())

['Intel Core i5' 'AMD Ryzen 7' 'INTEL Core İ5' 'i5-13420H'
 'Intel Core i5-13450HX' 'Intel Core i7-14650HX' 'Intel Core i5‑210H'
 'Intel Core Ultra 7' 'INTEL™' 'Intel Core i7' 'Intel Core Ultra 9'
 'AMD Ryzen 9' 'Intel Core Ultra 5' 'AMD' 'AMD Ryzen 5' 'INTEL Core™ i7'
 'INTEL Core™ i5' 'Intel Core i9' 'INTEL Core™ i3' 'Intel Ultra Serisi'
 'Intel Core i3' 'Ryzen AI 9 365' 'i7-13620H' 'AMD Ryzen 7 PRO'
 'INTEL Core İ7' 'AMD Ryzen 5 6600U' 'INTEL Celeron'
 'Intel Core Ultra 7 155H' 'Intel Core Ultra 5 135H' 'AMD Ryzen 5 7530U']


In [595]:
replace_dict = {
    'i5-13420H': 'Intel Core i5',
    'i7-13620H': 'Intel Core i7',
    'INTEL Core İ5': 'Intel Core i5',
    'Intel Core i5-13450HX': 'Intel Core i5',
    'Intel Core i7-14650HX': 'Intel Core i7',
    'Intel Core i5‑210H': 'Intel Core i5',
    'INTEL Core™ i7': 'Intel Core i7',
    'INTEL Core™ i5': 'Intel Core i5',
    'INTEL Core™ i3': 'Intel Core i3',
    'Ryzen AI 9 365': 'AMD Ryzen AI 9',
    'INTEL Core İ7': 'Intel Core i7',
    'AMD Ryzen 5 6600U': 'AMD Ryzen 5',
    'INTEL Celeron': 'Intel Celeron',
    'Intel Core Ultra 7 155H': 'Intel Core Ultra 7',
    'Intel Core Ultra 5 135H': 'Intel Core Ultra 5',
    'AMD Ryzen 5 7530U': 'AMD Ryzen 5',
}
df['text-xs'] = df['text-xs'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['text-xs'].dropna(inplace=True)
df['processor'] = df['text-xs']
df = df[(df['processor'] != "AMD") & (df['processor'] != "INTEL™") & (df['processor'] != "Intel Ultra Serisi")]

In [596]:
print(df["processor"])

0      Intel Core i5
1      Intel Core i5
2        AMD Ryzen 7
3        AMD Ryzen 7
4      Intel Core i5
           ...      
344    Intel Core i7
345      AMD Ryzen 7
346      AMD Ryzen 7
347    Intel Core i5
348      AMD Ryzen 5
Name: processor, Length: 315, dtype: object


In [597]:
print(df["processor"].unique())

['Intel Core i5' 'AMD Ryzen 7' 'Intel Core i7' 'Intel Core Ultra 7'
 'Intel Core Ultra 9' 'AMD Ryzen 9' 'Intel Core Ultra 5' 'AMD Ryzen 5'
 'Intel Core i9' 'Intel Core i3' 'AMD Ryzen AI 9' 'AMD Ryzen 7 PRO'
 'Intel Celeron']


In [598]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 315 entries, 0 to 348
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text-xs    315 non-null    object
 1   text-xs 2  315 non-null    object
 2   text-xs 3  315 non-null    object
 3   text-xs 4  315 non-null    object
 4   text-2xl   217 non-null    object
 5   brand      315 non-null    object
 6   processor  315 non-null    object
dtypes: object(7)
memory usage: 19.7+ KB
None


In [599]:
print(df['text-xs 2'].unique())

['40 GB' '16 GB' '32 GB' '64 GB' '24 GB' '8 GB' '12 GB' '48 GB'
 '16 GB 1600 MHz' '16 GB LPDDR5-6400 MHz RAM' '96 GB'
 '16 GB DDR5-5600 MHz RAM (2 x 8 GB)' '4 GB' '8 GB 4800 Mhz']


In [600]:
df['text-xs 2'].dropna(inplace=True)
df['ram'] = df['text-xs 2'].apply(lambda x: x.split(" ")[0]).astype(int)

In [601]:
print(df['ram'].unique())

[40 16 32 64 24  8 12 48 96  4]


In [602]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 315 entries, 0 to 348
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text-xs    315 non-null    object
 1   text-xs 2  315 non-null    object
 2   text-xs 3  315 non-null    object
 3   text-xs 4  315 non-null    object
 4   text-2xl   217 non-null    object
 5   brand      315 non-null    object
 6   processor  315 non-null    object
 7   ram        315 non-null    int64 
dtypes: int64(1), object(7)
memory usage: 22.1+ KB
None


In [603]:
drop_columns(df, ['text-xs', 'text-xs 2'])

In [604]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 315 entries, 0 to 348
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text-xs 3  315 non-null    object
 1   text-xs 4  315 non-null    object
 2   text-2xl   217 non-null    object
 3   brand      315 non-null    object
 4   processor  315 non-null    object
 5   ram        315 non-null    int64 
dtypes: int64(1), object(5)
memory usage: 17.2+ KB
None


In [605]:
print(df['text-xs 3'].unique())

['Intel UHD Graphics' 'AMD Radeon Graphics' 'Nvidia GeForce RTX 4060'
 'Nvidia GeForce RTX 4050' 'NVIDIA® GeForce RTX™ 5050'
 'NVIDIA® GeForce RTX™ 5060' 'NVIDIA® GeForce RTX™ 5070'
 'Nvidia GeForce RTX3050' 'RTX 5070 Ti' 'Intel® Arc™ Graphics'
 'GeForce RTX 5090' 'Nvidia GeForce RTX 4070' 'NVIDIA GeForce RTX 4050'
 'Intel Iris Xe Graphics' 'RTX 5080' 'Intel HD Graphics'
 'Nvidia GeForce MX570' 'Intel® Arc Graphics' 'RTX3500 Ada'
 'Intel Arc Graphics' 'AMD Radeon 740M' 'Intel® Iris® Xᵉ Grafik Kartı'
 'AMD Radeon 660M Grafik Kartı' 'GeForce GTX 5070TI'
 'Intel UHD Graphics 630' 'AMD Radeon™ 680M' 'Nvidia GeForce RTX 4090'
 'GeForce MX500' 'Nvidia GeForce MX330' 'Nvidia GeForce RTX 3060'
 'Nvidia GeForce RTX 3070' 'Intel® UHD Graphics' 'Radeon' 'NVIDIA'
 'Intel Iris Xe Grafik Kartı']


In [606]:
replace_dict = {
    'RTX 5070 Ti': 'Nvidia GeForce RTX™ 5070TI',
    'GeForce RTX 5090': 'Nvidia GeForce RTX™ 5090',
    'NVIDIA GeForce RTX 4050': 'Nvidia GeForce RTX™ 4050',
    'NVIDIA® GeForce RTX™ 5050': 'Nvidia GeForce RTX™ 5050',
    'NVIDIA® GeForce RTX™ 5060': 'Nvidia GeForce RTX™ 5060',
    'NVIDIA® GeForce RTX™ 5070': 'Nvidia GeForce RTX™ 5070',
    'GeForce GTX 5070TI': 'Nvidia GeForce RTX™ 5070TI',
    'Nvidia GeForce RTX 3070': 'Nvidia GeForce RTX™ 3070',
    'GeForce MX500': 'Nvidia GeForce MX500',
    'Nvidia GeForce RTX 4060': 'Nvidia GeForce RTX™ 4060',
    'Nvidia GeForce RTX 4050': 'Nvidia GeForce RTX™ 4050',
    'Nvidia GeForce RTX 3060': 'Nvidia GeForce RTX™ 3060',
    'Nvidia GeForce RTX 4090': 'Nvidia GeForce RTX™ 4090',
    'Nvidia GeForce RTX 4070': 'Nvidia GeForce RTX™ 4070',
    'RTX 5080': 'Nvidia GeForce RTX™ 5080',
    'Intel® Arc Graphics': 'Intel Arc Graphics',
    'Intel® Arc™ Graphics': 'Intel Arc Graphics',
    'Intel® Iris® Xᵉ Grafik Kartı': 'Intel Iris Xe Graphics',
    'Intel Iris Xe Grafik Kartı': 'Intel Iris Xe Graphics',
    'AMD Radeon™ 660M': 'AMD Radeon 660M',
    'Intel® UHD Graphics': 'Intel UHD Graphics',
    'Nvidia GeForce RTX3050': 'Nvidia GeForce RTX™ 3050',
    'RTX3500 ': 'Nvidia RTX™ 3500 Ada',
    'AMD Radeon 660M Grafik Kartı': 'AMD Radeon 660M Graphics',
}

df['text-xs 3'] = df['text-xs 3'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['text-xs 3'].dropna(inplace=True)
df = df[(df['text-xs 3'] != "NVIDIA") & (df['text-xs 3'] != "Radeon") & (df['processor'] != "Intel Ultra Serisi")]
df['video-card'] = df['text-xs 3']

In [607]:
print(df['video-card'].unique())

['Intel UHD Graphics' 'AMD Radeon Graphics' 'Nvidia GeForce RTX™ 4060'
 'Nvidia GeForce RTX™ 4050' 'Nvidia GeForce RTX™ 5050'
 'Nvidia GeForce RTX™ 5060' 'Nvidia GeForce RTX™ 5070'
 'Nvidia GeForce RTX™ 3050' 'Nvidia GeForce RTX™ 5070TI'
 'Intel Arc Graphics' 'Nvidia GeForce RTX™ 5090'
 'Nvidia GeForce RTX™ 4070' 'Intel Iris Xe Graphics'
 'Nvidia GeForce RTX™ 5080' 'Intel HD Graphics' 'Nvidia GeForce MX570'
 'RTX3500 Ada' 'AMD Radeon 740M' 'AMD Radeon 660M Graphics'
 'Intel UHD Graphics 630' 'AMD Radeon™ 680M' 'Nvidia GeForce RTX™ 4090'
 'Nvidia GeForce MX500' 'Nvidia GeForce MX330' 'Nvidia GeForce RTX™ 3060'
 'Nvidia GeForce RTX™ 3070']


In [608]:
drop_columns(df, ['text-xs 3'])


In [609]:
print(df['video-card'])

0            Intel UHD Graphics
1            Intel UHD Graphics
2           AMD Radeon Graphics
3           AMD Radeon Graphics
4      Nvidia GeForce RTX™ 4060
                 ...           
344    Nvidia GeForce RTX™ 4060
345         AMD Radeon Graphics
346         AMD Radeon Graphics
347      Intel Iris Xe Graphics
348    Nvidia GeForce RTX™ 3050
Name: video-card, Length: 313, dtype: object


In [610]:
print(df['text-xs 4'].unique())

['1 TB' '512 GB SSD' '1TB SSD' '16 GB' '6 TB' '512 GB' '2TB SSD'
 '1 TB SSD' '2 TB' '4 TB' '256 GB SSD' '256 GB' 'Yok' '128 GB']


In [611]:
replace_dict = {
    '1 TB': '1024 GB',
    '1TB': '1024 GB',
    '1 TB SSD': '1024 GB',
    '1TB SSD': '1024 GB',
    '2 TB': '2048 GB',
    '2TB SSD': '2048 GB',
    '4 TB': '4096 GB',
    '6 TB': '6144 GB',
}
df['text-xs 4'] = df['text-xs 4'].replace(replace_dict.keys(), replace_dict.values()).astype(str)
df['memory'] = df['text-xs 4']
df = df[(df['memory'] != "Yok")]

In [612]:
print(df['memory'].unique())

['1024 GB' '512 GB SSD' '16 GB' '6144 GB' '512 GB' '2048 GB' '4096 GB'
 '256 GB SSD' '256 GB' '128 GB']


In [613]:
df['memory'] = df['text-xs 4'].apply(lambda x: x.split(' ')[0]).astype(int)

In [614]:
print(df['memory'].unique())

[1024  512   16 6144 2048 4096  256  128]


In [615]:
print(df['text-2xl'].unique())

['18.599 TL' '24.009 TL' '19.799 TL' '17.649 TL' '37.449 TL' '38.699 TL'
 '41.189 TL' '38.649 TL' '17.999 TL' '36.799 TL' '42.999 TL' '56.339 TL'
 '16.749 TL' '58.729 TL' '64.149 TL' '68.399 TL' '82.069 TL' '37.259 TL'
 '133.639 TL' '23.999 TL' '47.119 TL' '324.599 TL' '91.859 TL'
 '151.499 TL' '41.039 TL' '40.089 TL' '39.569 TL' '39.059 TL' '37.739 TL'
 '36.789 TL' '25.799 TL' '26.399 TL' '23.899 TL' '24.599 TL' '25.549 TL'
 '24.499 TL' '44.999 TL' '42.409 TL' '29.389 TL' '30.659 TL' '31.839 TL'
 '30.419 TL' '43.749 TL' '42.449 TL' '33.899 TL' '36.559 TL' '24.479 TL'
 '16.499 TL' '15.399 TL' '32.029 TL' '39.499 TL' '40.819 TL' '29.719 TL'
 '26.269 TL' '20.399 TL' '19.349 TL' '24.759 TL' '19.249 TL' '18.249 TL'
 '24.149 TL' '18.699 TL' '24.429 TL' '20.999 TL' '23.249 TL' '19.499 TL'
 '22.309 TL' '30.609 TL' '29.479 TL' '29.059 TL' '25.649 TL' '24.649 TL'
 '22.999 TL' '23.449 TL' '25.929 TL' '26.359 TL' '26.779 TL' '19.599 TL'
 '18.999 TL' '18.499 TL' '22.759 TL' '28.249 TL' '25.939 TL'

In [616]:
df = df.dropna()
df['text-2xl'] = df['text-2xl'].apply(lambda x: x.replace('.', ''))
df['price'] = df['text-2xl'].apply(lambda x: x.split(' ')[0]).astype(int)

In [617]:
print(df['price'].unique())

[ 18599  24009  19799  17649  37449  38699  41189  38649  17999  36799
  42999  56339  16749  58729  64149  68399  82069  37259 133639  23999
  47119 324599  91859 151499  41039  40089  39569  39059  37739  36789
  25799  26399  23899  24599  25549  24499  44999  42409  29389  30659
  31839  30419  43749  42449  33899  36559  24479  16499  15399  32029
  39499  40819  29719  26269  20399  19349  24759  19249  18249  24149
  18699  24429  20999  23249  19499  22309  30609  29479  29059  25649
  24649  22999  23449  25929  26359  26779  19599  18999  18499  22759
  28249  25939  19999  19099  40509  13599  21789  20589  21419  20079
  20689  16999  62399  32969  22499  30139  21349  29009  37219  26699
  27649  25699  33249  24999  17299  17799  16299  19849  18849  17349
  18199  18649  39899  37199  35899  46649  29049  29199  33999 181199
 240039 235319 173999 245899 287799  42919  48819  58169  44289  43729
  36899  39469  36269  34859  28539  34299  37969  16789  22899  17699
  3310

In [618]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 217 entries, 0 to 240
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   text-xs 4   217 non-null    object
 1   text-2xl    217 non-null    object
 2   brand       217 non-null    object
 3   processor   217 non-null    object
 4   ram         217 non-null    int64 
 5   video-card  217 non-null    object
 6   memory      217 non-null    int64 
 7   price       217 non-null    int64 
dtypes: int64(3), object(5)
memory usage: 15.3+ KB
None


In [619]:
drop_columns(df, ["text-xs 4", "text-2xl"])

In [620]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
Index: 217 entries, 0 to 240
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   brand       217 non-null    object
 1   processor   217 non-null    object
 2   ram         217 non-null    int64 
 3   video-card  217 non-null    object
 4   memory      217 non-null    int64 
 5   price       217 non-null    int64 
dtypes: int64(3), object(3)
memory usage: 11.9+ KB
None


In [621]:
df.to_csv("data.csv", index=False)