In [3]:
# Full script 

# TODO why does it sometimes fail to log in
# TODO save cookies only if login was successful

import os
import pickle  # For saving and loading cookies
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import time
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import datetime
import logging
from config import BOT_TOKEN, CHAT_ID, MOM_CHAT_ID, PHONE_NUMBER, PASSWORD

# Set up logging
logging.basicConfig(filename='kabanchik_log.txt', level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

chrome_options = Options() # Set up Chrome options for headless browsing
chrome_options.add_argument("--headless")  # Runs Chrome in headless mode
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument('--disable-blink-features=AutomationControlled') # Optional: To help avoid detection by some websites

def message_on_telegram(message="Hello from Python on Telegram!"):
    # Message Mom
    url = f'https://api.telegram.org/bot{BOT_TOKEN}/sendMessage'
    payload = {'chat_id': CHAT_ID, 'text': message}
    requests.post(url, data=payload)
    
    payload = {'chat_id': MOM_CHAT_ID, 'text': message}
    requests.post(url, data=payload)

def get_posts(page_source):
    # Extract posts from page source
    soup = BeautifulSoup(page_source, 'html.parser')
    dashboard_div = soup.find('div', {'data-bazooka': 'Dashboard'})
    posts = []
    if dashboard_div:
        post_divs = dashboard_div.find_all('div', class_='kb-dashboard-performer')
        for post_div in post_divs:
            title_tag = post_div.find('a', class_='kb-dashboard-performer__title')
            posts.append({
                'title': title_tag.get_text(strip=True) if title_tag else '',
                'link': title_tag['href'] if title_tag else '',
            })
    return posts

def load_cookies(driver, cookies_file):
    if os.path.exists(cookies_file): 
        # Load cookies if the file exists
        with open(cookies_file, 'rb') as f:
            cookies = pickle.load(f)
        for cookie in cookies: # Some cookies may have an expiry set; ensure correct format
            if 'expiry' in cookie:
                del cookie['expiry']
            driver.add_cookie(cookie)
        logging.info("Cookies loaded. Session is now persistent.")
        return True
    else:
        logging.info('No cookies found. Logging in...')
        return False

def login(driver, wait):
    driver.get('https://kabanchik.ua/ua/auth/login') # Navigate to the login page
    username_field = wait.until(EC.presence_of_element_located((By.NAME, 'phoneEmail')))  # Wait for the username and password fields to be present
    password_field = wait.until(EC.element_to_be_clickable((By.NAME, 'password')))

    # Log In
    username_field.send_keys(PHONE_NUMBER)
    password_field.send_keys(PASSWORD)
    submit_button = driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
    submit_button.click()

    # Wait for the login to complete by checking for a change in the URL or presence of a logged-in element
    logging.info('Waiting for login to complete...')
    time.sleep(10)
    # wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div[data-bazooka='BaseHeader']")))

    # Save cookies to file for future sessions
    if driver.find_elements(By.CSS_SELECTOR, "div[data-bazooka='BaseHeader']"): 
        cookies = driver.get_cookies()
        with open(cookies_file, 'wb') as f:
            pickle.dump(cookies, f)
        logging.info("Logged in and cookies saved for session persistence.")
    else:
        logging.error("Login failed.")

def check_for_new_posts(driver, wait, seen_posts):
    driver.get('https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii') # Reload the page
    wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'kb-dashboard-performer'))) 
    
    # Get the updated posts
    page_source = driver.page_source
    current_posts = get_posts(page_source)
    logging.info(f"First post's title: {current_posts[0]['title']}")

    # Check for new posts
    logging.info('Checking for new posts...')
    for post in current_posts:
        if post['title'] not in [p['title'] for p in seen_posts]:
            logging.info(f"New post found: {post['title']} {post['link']}")
            message_on_telegram(f"New post! {post['title']} {post['link']}")
            seen_posts.append(post)

def main():
    try:
        driver = webdriver.Chrome(options=chrome_options)
        wait = WebDriverWait(driver, 30)  # Max wait time on the wait.until commands
        driver.get('https://kabanchik.ua') # Navigate to the base domain to set the correct context for cookies
        cookies_file = 'cookies.pkl'

        if not load_cookies(driver, cookies_file):
            login(driver, wait)

        # Navigate to the desired page
        driver.get('https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii')
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'kb-dashboard-performer')))
        logging.info(f'Fetched {driver.title}')

        # Initial extraction of posts
        page_source = driver.page_source
        seen_posts = get_posts(page_source)
        logging.info(f"First post's title: {seen_posts[0]['title']}")

        # Periodically check for new posts
        while True:
            time.sleep(10) # Reload every 10 seconds
            check_for_new_posts(driver, wait, seen_posts)

    except Exception as e:
        logging.error(f"An error occurred: {e}")

    finally:
        # Close the browser
        driver.quit()

if __name__ == "__main__":
    while True:
        main()
        time.sleep(10)


In [None]:
# Full script 

# TODO why does it sometimes fail to log in
# TODO save cookies only if login was successful

import os
import pickle  # For saving and loading cookies
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import time
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import datetime

chrome_options = Options() # Set up Chrome options for headless browsing
chrome_options.add_argument("--headless")  # Runs Chrome in headless mode
chrome_options.add_argument("--window-size=1920x1080")
chrome_options.add_argument('--disable-blink-features=AutomationControlled') # Optional: To help avoid detection by some websites

# Initialize the WebDriver
driver = webdriver.Chrome(options=chrome_options)
cookies_file = 'cookies.pkl'

def message_on_telegram(message="Hello from Python on Telegram!"):
    # Message Mom
    url = f'https://api.telegram.org/bot{BOT_TOKEN}/sendMessage'
    payload = {'chat_id': CHAT_ID, 'text': message}
    requests.post(url, data=payload)
    payload = {'chat_id': MOM_CHAT_ID, 'text': message}
    requests.post(url, data=payload)

def get_posts(page_source):
    # Extract posts from page source
    soup = BeautifulSoup(page_source, 'html.parser')
    dashboard_div = soup.find('div', {'data-bazooka': 'Dashboard'})
    posts = []
    if dashboard_div:
        post_divs = dashboard_div.find_all('div', class_='kb-dashboard-performer')
        for post_div in post_divs:
            title_tag = post_div.find('a', class_='kb-dashboard-performer__title')
            posts.append({
                'title': title_tag.get_text(strip=True) if title_tag else '',
                'link': title_tag['href'] if title_tag else '',
            })
    return posts

def load_cookies(driver, cookies_file):
    if os.path.exists(cookies_file): 
        # Load cookies if the file exists
        with open(cookies_file, 'rb') as f:
            cookies = pickle.load(f)
        for cookie in cookies: # Some cookies may have an expiry set; ensure correct format
            if 'expiry' in cookie:
                del cookie['expiry']
            driver.add_cookie(cookie)
        print("Cookies loaded. Session is now persistent.")
        return True
    else:
        return False

def login(driver, wait):
    print('No cookies found. Logging in...')
    driver.get('https://kabanchik.ua/ua/auth/login') # Navigate to the login page
    username_field = wait.until(EC.presence_of_element_located((By.NAME, 'phoneEmail')))  # Wait for the username and password fields to be present
    password_field = wait.until(EC.element_to_be_clickable((By.NAME, 'password')))

    # Log In
    username_field.send_keys(PHONE_NUMBER)
    password_field.send_keys(PASSWORD)
    submit_button = driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
    submit_button.click()

    # Wait for the login to complete by checking for a change in the URL or presence of a logged-in element
    print('Waiting for login to complete...')
    time.sleep(10)
    # wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div[data-bazooka='BaseHeader']")))

    # Save cookies to file for future sessions
    if driver.find_elements(By.CSS_SELECTOR, "div[data-bazooka='BaseHeader']"): 
        cookies = driver.get_cookies()
        with open(cookies_file, 'wb') as f:
            pickle.dump(cookies, f)
    # print page title
    print('After login we are at ' + driver.title)
    with open('current_page_content.html', 'w', encoding='utf-8') as f:
        f.write(driver.page_source)
    print("Logged in and cookies saved for session persistence.")

def check_for_new_posts(driver, wait, seen_posts):
    while True:
        time.sleep(10) # Reload every 10 seconds
        driver.get('https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii') # Reload the page
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'kb-dashboard-performer'))) 
        
        # Get the updated posts
        page_source = driver.page_source
        current_posts = get_posts(page_source)
        print(current_posts[0]['title'])

        # Check for new posts
        print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. Checking for new posts...')
        for post in current_posts:
            if post['title'] not in [p['title'] for p in seen_posts]:
                print("New post found!")
                print(post['title'] + " " + post['link'])
                message_on_telegram('New post!' + post['title'] + " " + post['link'])
                seen_posts.append(post)

def main():
    try:
        wait = WebDriverWait(driver, 30)  # Max wait time on the wait.until commands
        driver.get('https://kabanchik.ua') # Navigate to the base domain to set the correct context for cookies

        if not load_cookies(driver, cookies_file):
            login(driver, wait)

        # Navigate to the desired page
        driver.get('https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii')
        wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'kb-dashboard-performer')))
        print('Fetched ' + driver.title)

        # Initial extraction of posts
        page_source = driver.page_source
        seen_posts = get_posts(page_source)
        print(f"First post's title: {seen_posts[0]['title']}")

        # Periodically check for new posts
        check_for_new_posts(driver, wait, seen_posts)

    except Exception as e:
        print(f"An error occurred: {e}")

    finally:
        # Close the browser
        driver.quit()

if __name__ == "__main__":
    main()

In [117]:
# selenium version
import requests

def message_on_telegram(message="Hello from Python on Telegram!"):
    url = f'https://api.telegram.org/bot{BOT_TOKEN}/sendMessage'
    payload = {'chat_id': CHAT_ID, 'text': message}
    requests.post(url, data=payload)


from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import time
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from config import BOT_TOKEN, CHAT_ID, MOM_CHAT_ID, PHONE_NUMBER, PASSWORD

# Set up Chrome options for headless browsing
chrome_options = Options()
chrome_options.add_argument("--headless")  # Runs Chrome in headless mode
chrome_options.add_argument("--window-size=1920x1080")

# Initialize the WebDriver
driver = webdriver.Chrome(options=chrome_options)

try:
    # Navigate to the login page
    driver.get('https://kabanchik.ua/ua/auth/login')

    wait = WebDriverWait(driver, 10)
    username_field = wait.until(EC.presence_of_element_located((By.NAME, 'phoneEmail')))
    password_field = wait.until(EC.element_to_be_clickable((By.NAME, 'password')))

    # Find and fill the login form
    username_field = driver.find_element(By.NAME, 'phoneEmail')
    password_field = driver.find_element(By.NAME, 'password')
    # print(password_field.get_attribute('value'))
    # print("Password Field HTML:", password_field.get_attribute('outerHTML'))

    username_field.send_keys(PHONE_NUMBER)
    password_field.send_keys(PASSWORD)
    # print(password_field.get_attribute('value'))

    # Submit the form
    # password_field.submit()
    submit_button = driver.find_element(By.CSS_SELECTOR, 'button[type="submit"]')
    submit_button.click()
    # wait.until(EC.url_changes('https://kabanchik.ua/ua/auth/login'))

    # Wait for the login to complete
    time.sleep(10)  # Adjust time as needed
    page_source = driver.page_source
    with open('current_page_content.html', 'w', encoding='utf-8') as f:
        f.write(page_source)

    # Navigate to the desired page
    driver.get('https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii')

    # Wait for the page to load
    time.sleep(5)  # Adjust time as needed

    # Get the page source after JavaScript has executed
    description = get_last_post(driver.page_source)
    print(description)
    


finally:
    # Close the browser
    driver.quit()


KeyboardInterrupt: 

In [90]:
with open('current_page_content.html', 'w', encoding='utf-8') as f:
    f.write(page_source)



In [98]:
def get_last_post(page_source):
    # Parse the page source with BeautifulSoup
    soup = BeautifulSoup(page_source, 'html.parser')
    print(soup.title.string)

    # Find the main dashboard div that contains all the posts
    dashboard_div = soup.find('div', {'data-bazooka': 'Dashboard'})

    # Initialize an empty list to store the posts
    posts = []

    # Find all post containers within the dashboard div
    post_divs = dashboard_div.find_all('div', class_='kb-dashboard-performer')

    # Loop through each post div and extract information
    for post_div in post_divs:
        # Extract the title and link
        title_tag = post_div.find('a', class_='kb-dashboard-performer__title')
        if title_tag:
            title = title_tag.get_text(strip=True)
            link = title_tag['href']
        else:
            title = ''
            link = ''

        # Extract the description (e.g., execution date)
        description_tag = post_div.find('div', class_='kb-dashboard-performer__line')
        if description_tag:
            description = description_tag.get_text(strip=True)
        else:
            description = ''

        # Append the extracted data to the posts list
        posts.append({
            'title': title,
            'link': link,
            'description': description
        })

    # # Print out the list of posts
    # for post in posts:
    #     print(f"Title: {post['title']}. Link: ({post['link']})")
    # print(f"Description: {post['description']}")
    return posts[0]['title']

In [104]:
with open(cookies_file, 'wb') as f:
    pickle.dump(cookies, f)

    

In [122]:
import requests

message="Hello from Python on Telegram!"

url = f'https://api.telegram.org/bot{BOT_TOKEN}/sendMessage'
payload = {'chat_id': CHAT_ID, 'text': message}
requests.post(url, data=payload)

<Response [200]>

In [121]:
import requests

url = f'https://api.telegram.org/bot{BOT_TOKEN}/getUpdates'
response = requests.get(url).json()

print(response)

{'ok': True, 'result': [{'update_id': 716390475, 'message': {'message_id': 3, 'from': {'id': 261234088, 'is_bot': False, 'first_name': 'Oleg', 'last_name': 'Rybkin', 'username': 'StrawberryMatcha', 'language_code': 'en'}, 'chat': {'id': 261234088, 'first_name': 'Oleg', 'last_name': 'Rybkin', 'username': 'StrawberryMatcha', 'type': 'private'}, 'date': 1730105242, 'text': 'hi'}}, {'update_id': 716390476, 'message': {'message_id': 5, 'from': {'id': 5611210509, 'is_bot': False, 'first_name': '1539', 'username': 'VilhelmDnipro', 'language_code': 'en'}, 'chat': {'id': 5611210509, 'first_name': '1539', 'username': 'VilhelmDnipro', 'type': 'private'}, 'date': 1730110961, 'text': '/start', 'entities': [{'offset': 0, 'length': 6, 'type': 'bot_command'}]}}, {'update_id': 716390477, 'message': {'message_id': 6, 'from': {'id': 5611210509, 'is_bot': False, 'first_name': '1539', 'username': 'VilhelmDnipro', 'language_code': 'en'}, 'chat': {'id': 5611210509, 'first_name': '1539', 'username': 'VilhelmD

In [None]:
# cloudscraper version
import cloudscraper
import json

scraper = cloudscraper.create_scraper()

login_page_url = 'https://kabanchik.ua/ua/auth/login'
login_page_response = scraper.get(login_page_url)

# Check if the response is successful
if login_page_response.status_code != 200:
    print(f"Failed to load login page. Status code: {login_page_response.status_code}")

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                  'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
    'Referer': 'https://kabanchik.ua/ua/auth/login',
    'Origin': 'https://kabanchik.ua',
    'Content-Type': 'application/json; charset=utf-8',
    'Accept-Language': 'uk',
    'X-Requested-With': 'XMLHttpRequest',
}

login_data = {
    'phone_email': PHONE_NUMBER,  # Replace with your phone number or email
    'password': PASSWORD,      # Replace with your password
}

login_url = 'https://kabanchik.ua/api/v3/auth/login'

# Convert login data to JSON
login_payload = json.dumps(login_data)

# Send the POST request
login_response = scraper.post(login_url, data=login_payload, headers=headers)

if login_response.status_code == 200:
    print("Login request successful.")
else:
    print(f"Login request failed with status code {login_response.status_code}")
    print(f"Response: {login_response.text}")
    # Exit or handle the error accordingly

if 'auth' in scraper.cookies.get_dict():
    print("Login successful! 'auth' cookie has been set.")
else:
    print("Login failed. 'auth' cookie not found.")
    # Exit or handle the error accordingly

protected_url = 'https://kabanchik.ua/ua/cabinet/dnipro/category/maliunky-ta-iliustratsii'
response = scraper.get(protected_url, headers=headers)

# Save the page content to a file
with open('current_page_content.html', 'w', encoding='utf-8') as f:
    f.write(response.text)

# Optional: Parse the page and print the title
from bs4 import BeautifulSoup

soup = BeautifulSoup(response.text, 'html.parser')
print(f"Page title: {soup.title.string}")
