In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd
import time
import re

In [53]:
# Define the required scopes
scopes = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']

# Authorize the client
creds = ServiceAccountCredentials.from_json_keyfile_name('lego-441023-0d2c9d44cde8.json', scopes)
client = gspread.authorize(creds)

# Open the Google Sheet by name
spreadsheet = client.open('inwestycje')  # Replace with the actual name of your sheet

# Access the sheet by name
sheet = spreadsheet.worksheet('LEGO')  # Replace 'lego' with the exact name of the sheet

In [54]:
# Get all records from the sheet (headers are automatically used)
data = sheet.get_all_records(head=1)

# Convert the data into a pandas DataFrame
df = pd.DataFrame(data)
num_rows = df.shape[0]
print(f"Number of rows: {num_rows}")
df = df.iloc[:-1]  # Drops the last row

# Display the DataFrame to check the result
print(df.head())  # Show the first few rows of the DataFrame


Number of rows: 14
  lp                                      nazwa zestawu nr zestawu  \
0  1                          Pandy na Chiński Nowy Rok      40466   
1  2  Maska Batmana z klasycznego serialu telewizyjnego      76238   
2  3                    Harry Potter i Hermiona Granger      76393   
3  4               Zabawa i styl — zestaw dodatkowy VIP      40512   
4  5                          Bohaterowie bitwy o Endor      40623   

        seria cena wejściowa ilość sztuk suma zakupu miejsce zakupu  \
0  BrickHeadz          53.99           5   269.95 zł           lego   
1      Batman         195.99           3   587.97 zł           lego   
2  BrickHeadz         440.99           2   881.98 zł           lego   
3      Gratis            0.0           1     0.00 zł           lego   
4  BrickHeadz         179.99           3   539.97 zł           lego   

   data kupna data sprzedania  teraz     zysk suma sprz czas inwestycji  \
0  29/11/2022                    120  222.26%     600.0   

In [9]:
def priceGen(url):
    print(f"URL passed to priceGen: {url}") 
    # Setup the WebDriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    
    driver.get(url)
    
    # Get the page source after JavaScript is executed
    page_content = driver.page_source
    
    # Parse the page content using BeautifulSoup
    soup = BeautifulSoup(page_content, 'html.parser')
    
    # Extract the title of the page
    title = soup.title.string
    print("Tytuł strony:", title)
    
 # Locate the section where prices are inside <dl class="row m-0">
    price_section = soup.find('dl', class_='row m-0')
    
    if price_section:
        # Find the price by targeting the <a> tag with class 'bprice'
        price_tag = price_section.find('a', class_='bprice')
        
        if price_tag:
            price = price_tag.get_text()  # Extract the price text
        else:
            price = 'Price not found'
        print(price)
    else:
        price = 'Price section not found'
    # Quit the driver
    driver.quit()
    return price


In [11]:
# Define clean_price with debug information
def clean_price(price_text):
    # Print the type and content of price_text to understand the format
    print("Debug:", type(price_text), price_text)

    # Ensure we only process strings
    if isinstance(price_text, str):
        # Remove ' zł' if present
        price_text = price_text.replace(" zł", "")
        # Replace comma with dot
        price_text = price_text.replace(",", ".")
        # Convert to a float
        return float(price_text)
    else:
        return float("nan")  # Return NaN for any non-string values


In [15]:
start_time = time.time() 
df['teraz'] = df.apply(lambda row: priceGen(row['check']), axis=1)
end_time = time.time()  # Record the end time

# Calculate elapsed time
elapsed_time = end_time - start_time
print(f"Time passed: {elapsed_time:.2f} seconds")

URL passed to priceGen: https://promoklocki.pl/lego-brickheadz-40466-pandy-na-chinski-nowy-rok-p21201
Tytuł strony: LEGO 40466 BrickHeadz Pandy na Chiński Nowy Rok - porównaj ceny - promoklocki.pl
120,00 zł
URL passed to priceGen: https://promoklocki.pl/lego-dc-super-heroes-76238-maska-batmana-z-klasycznego-serialu-telewizyjnego-p21439
Tytuł strony: LEGO 76238 DC Super Heroes Maska Batmana z klasycznego serialu telewizyjnego - porównaj ceny - promoklocki.pl
199,00 zł
URL passed to priceGen: https://promoklocki.pl/lego-harry-potter-76393-harry-potter-i-hermiona-granger-p21316
Tytuł strony: LEGO 76393 Harry Potter Harry Potter i Hermiona Granger - porównaj ceny - promoklocki.pl
489,00 zł
URL passed to priceGen: https://promoklocki.pl/lego-promocyjne-40512-zabawa-i-styl-zestaw-dodatkowy-vip-p22004
Tytuł strony: LEGO 40512 Promocyjne Zabawa i styl - zestaw dodatkowy VIP - porównaj ceny - promoklocki.pl
34,99 zł
URL passed to priceGen: https://promoklocki.pl/lego-brickheadz-40623-bohaterowi

In [16]:
# Test clean_price on your data
df.loc[:, 'teraz'] = df['teraz'].apply(clean_price)

print(df['teraz'])

Debug: <class 'str'> 120,00 zł
Debug: <class 'str'> 199,00 zł
Debug: <class 'str'> 489,00 zł
Debug: <class 'str'> 34,99 zł
Debug: <class 'str'> 235,00 zł
Debug: <class 'str'> 75,00 zł
Debug: <class 'str'> 79,00 zł
Debug: <class 'str'> 182,69 zł
Debug: <class 'str'> 58,99 zł
Debug: <class 'str'> 75,00 zł
Debug: <class 'str'> 239,90 zł
Debug: <class 'str'> 79,00 zł
Debug: <class 'str'> 114,56 zł
0      120.0
1      199.0
2      489.0
3      34.99
4      235.0
5       75.0
6       79.0
7     182.69
8      58.99
9       75.0
10     239.9
11      79.0
12    114.56
Name: teraz, dtype: object


In [19]:
# Ensure that 'df['teraz']' contains the updated prices
values = df['teraz'].values.tolist()

# Get the headers of the sheet to locate the 'teraz' column
headers = sheet.row_values(1)  # Assuming the first row contains headers

# Find the column index for 'teraz'
teraz_col_index = headers.index('teraz') + 1  # Adding 1 because gspread is 1-indexed

# Assuming 'teraz' values should be updated starting from row 2 (if row 1 is for headers)
for i, value in enumerate(values, start=2):  # Start from row 2 to skip the header
    sheet.update_cell(i, teraz_col_index, value)  # Update the cell in the 'teraz' column

print("Google Sheet 'teraz' column updated successfully.")


Google Sheet 'teraz' column updated successfully.


In [29]:
# # Loop through the first 5 rows and set the 'teraz' column
# for index in range(5):
#     df.loc[index, 'teraz'] = priceGen(df.loc[index, 'check'])

# # Show the results for the first 5 rows
# print(df.head(5)[['check', 'teraz']])


URL passed to priceGen: https://promoklocki.pl/lego-brickheadz-40466-pandy-na-chinski-nowy-rok-p21201
Tytuł strony: LEGO 40466 BrickHeadz Pandy na Chiński Nowy Rok - porównaj ceny - promoklocki.pl
120,00 zł


  df.loc[index, 'teraz'] = priceGen(df.loc[index, 'check'])


URL passed to priceGen: https://promoklocki.pl/lego-dc-super-heroes-76238-maska-batmana-z-klasycznego-serialu-telewizyjnego-p21439
Tytuł strony: LEGO 76238 DC Super Heroes Maska Batmana z klasycznego serialu telewizyjnego - porównaj ceny - promoklocki.pl
199,00 zł
URL passed to priceGen: https://promoklocki.pl/lego-harry-potter-76393-harry-potter-i-hermiona-granger-p21316
Tytuł strony: LEGO 76393 Harry Potter Harry Potter i Hermiona Granger - porównaj ceny - promoklocki.pl
489,00 zł
URL passed to priceGen: https://promoklocki.pl/lego-promocyjne-40512-zabawa-i-styl-zestaw-dodatkowy-vip-p22004
Tytuł strony: LEGO 40512 Promocyjne Zabawa i styl - zestaw dodatkowy VIP - porównaj ceny - promoklocki.pl
34,99 zł
URL passed to priceGen: https://promoklocki.pl/lego-brickheadz-40623-bohaterowie-bitwy-o-endor-p22295
Tytuł strony: LEGO 40623 BrickHeadz Bohaterowie bitwy o Endor - porównaj ceny - promoklocki.pl
235,00 zł
                                               check      teraz
0  https://prom

In [57]:
print(df['cena wejściowa'])

0      53.99
1     195.99
2     440.99
3        0.0
4     179.99
5        0.0
6      44.99
7     119.99
8        0.0
9        0.0
10    179.99
11     44.99
12       0.0
Name: cena wejściowa, dtype: object


In [71]:
# df.loc[:, 'cena wejściowa'] = df['cena wejściowa'].apply(clean_price)
cashin = round((df['cena wejściowa'] * df['ilość sztuk']).sum(), 2)
cashout = round((df['teraz'] * df['ilość sztuk']).sum(), 2)
profit = round((cashout-cashin)/cashin*100, 2)

print(f"Cash in = {cashin}")
print(f"Cash out = {cashout}")
print(f"Profit = {profit}%")

Cash in = 3044.78
Cash out = 4421.51
Profit = 45.22%
