### Automating repetitive tasks

1. Using loops and list comprehensions
2. Writing custom functions
3. Utilizing Python libraries for scheduling (e.g., schedule, cron-like job scheduling)

### Web scraping and data extraction
1. Introduction to web scraping with Beautiful Soup
2. Extracting data from web pages
3. Handling pagination and dynamic content (with Selenium, if needed)

In [None]:
!pip install schedule beautifulsoup4 selenium

In [1]:
import requests # for making HTTP requests to web pages
import schedule # for scheduling tasks to run periodically
import time # for sleeping the program for a specified amount of time
from bs4 import BeautifulSoup
from datetime import datetime # for getting the current date and time
from tqdm import tqdm # for displaying a progress bar for a loop
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Example 1: Automating repetitive tasks

#def job():
    #print("I'm working...")

#schedule.every(5).seconds.do(job)

In [3]:
#while True:
    #schedule.run_pending()
    #time.sleep(1)

In [4]:
def get_bitcoin_price():
    url = 'https://www.coingecko.com/en/coins/bitcoin'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    price_address = soup.find('span', class_='no-wrap')
    price = price_address.text.strip()
    return price


def log_bitcoin_price():
    current_price = get_bitcoin_price()
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    log_message = f"{timestamp} - Current Bitcoin price: {current_price}\n"
    # Write the log message to a file
    with open('bitcoin_price_log.txt', 'a') as log_file:
        # mode 'a' is for appending to the file
        # mode 'w' is for writing to the file
        # mode 'r' is for reading from the file
        log_file.write(log_message)

    # Print the log message to the console
    print(log_message.strip())

In [5]:
schedule.every(1).second.do(log_bitcoin_price)

Every 1 second do log_bitcoin_price() (last run: [never], next run: 2023-03-19 13:45:10)

In [6]:
while True:
    schedule.run_pending()
    time.sleep(1)

2023-03-19 13:45:21 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:22 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:25 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:26 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:28 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:29 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:31 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:33 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:34 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:36 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:38 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:39 - Current Bitcoin price: $27,454.17
2023-03-19 13:45:41 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:42 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:44 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:46 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:47 - Current Bitcoin price: $27,451.84
2023-03-19 13:45:49 - Current Bitcoin price: $27

KeyboardInterrupt: 

In [7]:
URL = 'https://github.com/trending'

response = requests.get(URL)
soup = BeautifulSoup(response.content, 'html.parser')

# Find the top 10 trending repositories
trending_repositories = soup.find_all('article', class_='Box-row', limit=10)

for index, repo in enumerate(trending_repositories):
    name = repo.h1.text.strip().replace('\n', '').replace(' ', '')
    description = repo.find('p', class_='col-9').text.strip() if repo.find('p', class_='col-9') else 'No description provided'
    language = repo.find('span', itemprop='programmingLanguage').text.strip() if repo.find('span', itemprop='programmingLanguage') else 'Not specified'
    print(f"{index + 1}. Repository: {name}\nDescription: {description}\nLanguage: {language}\n")

1. Repository: microsoft/semantic-kernel
Description: Integrate cutting-edge LLM technology quickly and easily into your apps
Language: C#

2. Repository: tloen/alpaca-lora
Description: Instruct-tune LLaMA on consumer hardware
Language: Jupyter Notebook

3. Repository: cocktailpeanut/dalai
Description: The simplest way to run LLaMA on your local machine
Language: JavaScript

4. Repository: mckaywrigley/chatbot-ui
Description: A ChatGPT clone for running locally in your browser.
Language: TypeScript

5. Repository: gencay/vscode-chatgpt
Description: Your best AI pair programmer - open source only
Language: TypeScript

6. Repository: setzer22/llama-rs
Description: Run LLaMA inference on CPU, with Rust 🦀🚀🦙
Language: Rust

7. Repository: massgravel/Microsoft-Activation-Scripts
Description: A Windows and Office activator using HWID / KMS38 / Online KMS activation methods, with a focus on open-source code and fewer antivirus detections.
Language: Batchfile

8. Repository: GaiZhenbiao/Chuanhu

In [11]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# Create a new instance of the Chrome driver
driver = webdriver.Chrome()

# Go to the Wikipedia home page
driver.get('https://www.wikipedia.org')

# Find the search box
search_box = driver.find_element(By.ID, 'searchInput')

# Enter search query
search_box.send_keys('France')

# Submit the form (like hitting return)
search_box.submit()

# Wait for the page to load
time.sleep(5)

# Get the main content of the page
main_content = driver.find_element(By.ID, 'content').text

# Write the main content to a file
with open('wikipedia_page.txt', 'w', encoding='utf-8') as file:
    file.write(main_content)

# Close the browser
driver.quit()

In [None]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set the path to the ChromeDriver executable
chromedriver_path = '/path/to/chromedriver'

# Create a WebDriver instance
driver = webdriver.Chrome(executable_path=chromedriver_path)

# Navigate to the GitHub user's profile page
github_user = 'qlinhta'
driver.get(f'https://github.com/{github_user}?tab=repositories')

# Wait for the repositories to load
wait = WebDriverWait(driver, 10)
repositories = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="wb-break-all"]//a')))

# Scrape the repository names
repo_names = [repo.text for repo in repositories]

# Close the WebDriver instance
driver.quit()

# Print the repository names
print(f"Repositories of {github_user}:")
for name in repo_names:
    print(name)