# BEAUTIFULSOUP 

In [15]:
pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [16]:
from bs4 import BeautifulSoup

In [17]:
html = """
        <!DOCTYPE html><html><head><title>Example HTML</title></head><body><h1>Hello, World!</h1><p>A simple HTML page for testing web scraping with BeautifulSoup.</p>
                <a c
                lass='link' href='www.miuul.com' target='blank' aria-label='Miuul (Opens Miuul Page)'>Click</a>
                <li>Outsider</li>
                <ul>
                    <li>Item 1</li>
                    <li>Item 2</li>
                </ul>
            </body>
            </html>

"""

In [18]:
soup = BeautifulSoup(html, "html.parser")

In [19]:
soup


<!DOCTYPE html>
<html><head><title>Example HTML</title></head><body><h1>Hello, World!</h1><p>A simple HTML page for testing web scraping with BeautifulSoup.</p>
<a aria-label="Miuul (Opens Miuul Page)" c="" href="www.miuul.com" lass="link" target="blank">Click</a>
<li>Outsider</li>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
</body>
</html>

In [20]:
soup.title

<title>Example HTML</title>

In [21]:
title = soup.title

In [22]:
type(title)

bs4.element.Tag

In [23]:
title.text

'Example HTML'

In [24]:
title.string

'Example HTML'

In [26]:
print(soup.prettify())  # daha düzenli görürüz prettify ile 

<!DOCTYPE html>
<html>
 <head>
  <title>
   Example HTML
  </title>
 </head>
 <body>
  <h1>
   Hello, World!
  </h1>
  <p>
   A simple HTML page for testing web scraping with BeautifulSoup.
  </p>
  <a aria-label="Miuul (Opens Miuul Page)" c="" href="www.miuul.com" lass="link" target="blank">
   Click
  </a>
  <li>
   Outsider
  </li>
  <ul>
   <li>
    Item 1
   </li>
   <li>
    Item 2
   </li>
  </ul>
 </body>
</html>



In [27]:
soup.ul

<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>

In [28]:
soup.li

<li>Outsider</li>

In [30]:
ul = soup.ul
type(ul)

bs4.element.Tag

In [31]:
ul.li

<li>Item 1</li>

In [33]:
ul.li.last

# Navigating and Searching HTML

In [36]:
from bs4 import BeautifulSoup

html = """
        <!DOCTYPE html>
        <html>
            <head>
                <title>Example HTML</title>
            </head>
            <body>
                <h1>Hello, World!</h1>
                <p id="paragraph" >A simple HTML page for testing web scraping with BeautifulSoup.</p>
                <a class='link' href='www.miuul.com' target='blank' aria-label='Miuul (Opens Miuul Page)'>Click</a>
                <li>Outsider</li>
                <ul>
                    <li class="list-item">Item 1</li>
                    <li class="list-item">Item 2</li>
                </ul>
                <li>Outsider 2</li>
            </body>
            </html>
"""


In [37]:
soup = BeautifulSoup(html , "html.parser") 

In [38]:
soup.a

<a aria-label="Miuul (Opens Miuul Page)" class="link" href="www.miuul.com" target="blank">Click</a>

In [45]:
soup.find("a", attrs={'class': 'link',  'target': 'blank'})

<a aria-label="Miuul (Opens Miuul Page)" class="link" href="www.miuul.com" target="blank">Click</a>

In [47]:
soup.find("li")

<li>Outsider</li>

In [49]:
soup.find_all("li")   #all tüm li getirdi ! 

[<li>Outsider</li>,
 <li class="list-item">Item 1</li>,
 <li class="list-item">Item 2</li>,
 <li>Outsider 2</li>]

In [51]:
soup.find_all ("li" , attrs={ "class" : "list-item"})

[<li class="list-item">Item 1</li>, <li class="list-item">Item 2</li>]

In [54]:
li_elements = soup.find_all ("li" , attrs={ "class" : "list-item"})
li_elements

[<li class="list-item">Item 1</li>, <li class="list-item">Item 2</li>]

In [56]:
li_elements[-1]    # li elements sonuncusunu getir 

<li class="list-item">Item 2</li>

# Extracting Data from HTML Elements

In [87]:
from bs4 import BeautifulSoup 

In [88]:
html = """<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Animal Table</title>
  <style>
    table {
      width: 80%;
      border-collapse: collapse;
      margin: 20px;
    }

    th, td {
      border: 1px solid #dddddd;
      text-align: left;
      padding: 8px;
    }

    th {
      background-color: #f2f2f2;
    }

    img {
object-fit: cover;
      max-width: 50px;
      max-height: 50px;
    }
  </style>
</head>
<body>

  <h2>Animal Table</h2>

  <table>
    <thead><tr>
      <th>Image</th>
      <th>Animal</th>
      <th>Description</th>
      <th>Nickname</th>
    </tr></thead>
    <tbody>
    <tr>
      <td><img src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" alt="Lion"></td>
      <td><a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a></td>
      <td>The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."</td>
      <td> Majestic<br>King  </td>
    </tr>
    <tr>
      <td><img src="https://images.unsplash.com/photo-1551316679-9c6ae9dec224?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" alt="Elephant"></td>
      <td><a href="https://en.wikipedia.org/wiki/Elephant" target="_blank">Elephant</a></td>
      <td>Elephants are the largest land animals. They are known for their long trunks and large ears.</td>
      <td> Trunked<br>  Giant</td>
    </tr>
    <tr>
      <td><img src="https://images.unsplash.com/photo-1570481662006-a3a1374699e8?q=80&w=1965&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" alt="Dolphin"></td>
      <td><a href="https://en.wikipedia.org/wiki/Dolphin" target="_blank">Dolphin</a></td>
      <td>Dolphins are highly intelligent marine mammals known for their playful behavior and communication skills.</td>
      <td> Playful<br>Communicator</td>
    </tr>
    <tr>
      <td><img src="https://images.unsplash.com/photo-1599631438215-75bc2640feb8?q=80&w=2127&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" alt="Butterfly"></td>
      <td><a href="https://en.wikipedia.org/wiki/Butterfly" target="_blank">Butterfly</a></td>
      <td>Butterflies are beautiful insects with colorful wings. They undergo a process called metamorphosis from caterpillar to butterfly.</td>
      <td> Colorful<br>Metamorphosis</td>
    </tr>
    <tr>
      <td><img src="https://images.unsplash.com/photo-1552633832-4f5a1b110980?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" alt="Penguin"></td>
      <td><a href="https://en.wikipedia.org/wiki/Penguin" target="_blank">Penguin</a></td>
      <td>Penguins are flightless birds that are well-adapted to life in the water. They are known for their tuxedo-like black and white plumage.</td>
      <td> Tuxedoed     <br>Adaptation  </td>
    </tr>
  </tbody>
  </table>
</body>
</html>"""

In [89]:
soup = BeautifulSoup (html, "html.parser")

In [90]:
tbody_tag = soup.find("tbody")

In [94]:
tr_tag_list =tbody_tag.find_all("tr")


In [95]:
print(tr_tag_list)

[<tr>
<td><img alt="Lion" src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/></td>
<td><a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a></td>
<td>The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."</td>
<td> Majestic<br/>King  </td>
</tr>, <tr>
<td><img alt="Elephant" src="https://images.unsplash.com/photo-1551316679-9c6ae9dec224?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/></td>
<td><a href="https://en.wikipedia.org/wiki/Elephant" target="_blank">Elephant</a></td>
<td>Elephants are the largest land animals. They are known for their long trunks and large ears.</td>
<td> Trunked<br/>  Giant</td>
</tr>, <tr>
<td><img alt="Dolphin" src="https://images.unsplash.com/ph

In [98]:
tr_tag =tr_tag_list [0]  #Lion
tr_tag

<tr>
<td><img alt="Lion" src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/></td>
<td><a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a></td>
<td>The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."</td>
<td> Majestic<br/>King  </td>
</tr>

In [99]:
tr_tag =tr_tag_list [1] #Elephant
tr_tag

<tr>
<td><img alt="Elephant" src="https://images.unsplash.com/photo-1551316679-9c6ae9dec224?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/></td>
<td><a href="https://en.wikipedia.org/wiki/Elephant" target="_blank">Elephant</a></td>
<td>Elephants are the largest land animals. They are known for their long trunks and large ears.</td>
<td> Trunked<br/>  Giant</td>
</tr>

In [None]:
img_tag = tr_tag.find("img")
a_tag = tr_tag.find("a")

In [104]:
img_tag =tr_tag.find("img")
a_tag =tr_tag.find("a")


In [105]:
img_tag


<img alt="Elephant" src="https://images.unsplash.com/photo-1551316679-9c6ae9dec224?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/>

In [106]:
a_tag

<a href="https://en.wikipedia.org/wiki/Elephant" target="_blank">Elephant</a>

In [108]:
nickname_td = tr_tag.find_all("td")[-1]
nickname_td

<td> Trunked<br/>  Giant</td>

In [109]:
desc_td =tr_tag.find_all("td")[-2]
desc_td 

<td>Elephants are the largest land animals. They are known for their long trunks and large ears.</td>

In [110]:
desc_td.text

'Elephants are the largest land animals. They are known for their long trunks and large ears.'

In [111]:
nickname_td.text

' Trunked  Giant'

In [112]:
cleaned_text = nickname_td.text.strip()   #metnin başındaki ve sonundaki gereksiz boşlukları kaldırır !
cleaned_text

'Trunked  Giant'

## Lion

In [113]:
tr_tag =tr_tag_list [0]  #Lion
tr_tag

<tr>
<td><img alt="Lion" src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/></td>
<td><a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a></td>
<td>The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."</td>
<td> Majestic<br/>King  </td>
</tr>

In [114]:
img_tag = tr_tag.find("img")
a_tag = tr_tag.find("a")

In [115]:
img_tag

<img alt="Lion" src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/>

In [116]:
a_tag

<a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a>

In [117]:
nickname_td = tr_tag.find_all("td")[-1]
nickname_td

<td> Majestic<br/>King  </td>

In [118]:
desc_td =tr_tag.find_all("td")[-2]
desc_td

<td>The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."</td>

In [119]:
desc_td.text

'The lion is a large carnivorous mammal. It is known for its majestic appearance and is often referred to as the "king of the jungle."'

In [120]:
nickname_td.text

' MajesticKing  '

In [123]:
nickname_td.get_text(separator=" ", strip=True)    #Önce sprip ile boşlukları halletti, sonra seperator ile araya boşluk koyduk!!!


'Majestic King'

In [124]:
img_tag

<img alt="Lion" src="https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&amp;w=1974&amp;auto=format&amp;fit=crop&amp;ixlib=rb-4.0.3&amp;ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"/>

In [125]:
img_tag["alt"]

'Lion'

In [126]:
alt_attribute = img_tag["alt"]
alt_attribute

'Lion'

In [127]:
img_tag["src"]

'https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'

In [128]:
src_attribute = img_tag["src"]
src_attribute

'https://images.unsplash.com/photo-1534188753412-3e26d0d618d6?q=80&w=1974&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D'

In [129]:
a_tag

<a href="https://en.wikipedia.org/wiki/Lion" target="_blank">Lion</a>

In [130]:
a_tag["href"]

'https://en.wikipedia.org/wiki/Lion'

In [131]:
a_tag["target"]

'_blank'

## Scraping a Web Page

In [132]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [133]:
import requests

In [135]:
from bs4 import BeautifulSoup

In [136]:
requests.get("https://www.example.com")   #get() metodu ile istediğimiz herhangi web sayfaya gidebiliriz !!

<Response [200]>

In [None]:
#bu <Response [200]>  demek başarılı istek demektir 

In [138]:
result =requests.get("https://www.example.com")
result.status_code  

200

In [140]:
result.content  #sayfa içeriği olan HTML içeri almak için contentkullandık

b'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    

In [142]:
html =result.content

In [143]:
soup = BeautifulSoup(html , "html.parser")

In [149]:
soup.find("h1")

<h1>Example Domain</h1>

In [150]:
soup.find("h1").text

'Example Domain'

In [147]:
print(soup.prettify())   # DÜZENLİ GÖRÜNÜM prettify

<!DOCTYPE html>
<html>
 <head>
  <title>
   Example Domain
  </title>
  <meta charset="utf-8"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <style type="text/css">
   body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
  </style>
 </head>
 <body>
  <div>
   <h1>
    Example Domain
   </h1>
   <p>
    This dom

# SELENIUM 

In [152]:
pip install selenium   #Selenium kütüphanesini yükledik

Note: you may need to restart the kernel to use updated packages.


In [159]:
from selenium import webdriver

In [167]:
driver = webdriver.Chrome()
driver.get("https://www.example.com")   # example chorome da açıldı ! 

In [171]:
driver.title    #tarayıcımızın başlığı

'Example Domain'

In [172]:
driver.current_url #tarayıcı linki 

'https://www.example.com/'

In [174]:
driver.quit() #tarayıcı kapatmak

In [None]:
#miuul örneği 

In [181]:
from selenium import webdriver

In [182]:
driver1 = webdriver.Chrome()
driver1.get("https://www.miuul.com")   # miuul chorome da açıldı ! 

In [183]:
driver1.title

'Bootcamps, Kariyer Yolculukları ve Kurslar | Miuul'

In [186]:
driver1.current_url

'https://miuul.com/'

In [187]:
driver1.quit()

#  Finding Elements and Extracting Data

In [188]:
from selenium import webdriver

In [194]:
from selenium.webdriver.common.by import By

In [193]:
driver = webdriver.Chrome()
driver.get("https://www.example.com")

In [None]:
#(//a elementini bul incelediğin sayfadan)

In [195]:
element = driver.find_element(By.XPATH , "//a")
element

<selenium.webdriver.remote.webelement.WebElement (session="829f8543138773825b8f809b6dff873c", element="f.D21A0F568E58EDD5CA7C944B0749BD87.d.EAAEF000C75AD3A03E82ADE8B6490000.e.3")>

In [197]:
element.text  # more information diyorsa Element doğru !!

'More information...'

In [200]:
element.get_attribute("href")

'https://www.iana.org/domains/example'

In [202]:
element.get_attribute("innerHTML") #Element içinde yer alan HTML bize verecek 

'More information...'

# Finding Elements (Better Approach)

In [215]:
from selenium import webdriver
from selenium.webdriver.common.by import By


In [209]:
import time

In [216]:
driver = webdriver.Chrome()
driver.get("https://www.example.com")
time.sleep(2)

In [225]:
p_elements = driver.find_elements(By.XPATH, "//p")
p_elements 



[<selenium.webdriver.remote.webelement.WebElement (session="aba5011038f91a14a1fe68c4e121480d", element="f.32D870CAD6E6F9800DA1299FFDE46E3E.d.A95DFF68782BEDBDEC3D548393B52C84.e.3")>,
 <selenium.webdriver.remote.webelement.WebElement (session="aba5011038f91a14a1fe68c4e121480d", element="f.32D870CAD6E6F9800DA1299FFDE46E3E.d.A95DFF68782BEDBDEC3D548393B52C84.e.4")>]

In [222]:
elem = None
if p_elements:
    elem = p_elements[0]
else:
    print("Element not found")

print(elem)

Element not found
None


# Interacting with Elements

In [254]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys


In [271]:
driver = webdriver.Chrome()
driver.get("https://www.miuul.com")
time.sleep(2)


In [252]:
btn_elements = driver.find_elements(By.XPATH, "//a[@id='login']")
btn = btn_elements[0]


In [246]:
btn.click()

In [256]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys


In [257]:
driver = webdriver.Chrome()
driver.get("https://www.miuul.com")
time.sleep(2)

In [None]:
#arama butonunu elementi bulma!sağclick incele yaptık !

In [None]:
#  //input[@name='arama']

In [259]:
inputs = driver.find_elements(By.XPATH, "//input[@name='arama']")
input = inputs[0]


In [260]:
input.send_keys("Data Science", Keys.ENTER)

# Scrolling and Scrolling Inside Dropdown

In [261]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

In [309]:
options = webdriver.ChromeOptions () #WebDriver’a tarayıcı ile nasıl çalışması gerektiğini söylemek için bu seçenekleri kullanırız
options.add_argument("--start-maximized") #tarayıcı penceresinin tam ekran modunda açılmasını sağlar. 
driver = webdriver.Chrome(options)  #Chrome tarayıcı kullanılarak WebDriver başlatılır. 
driver.get("https://miuul.com/katalog") 
time.sleep(2) #program 2 saniye bekler. Bu bekleme süresi, sayfanın tam olarak yüklenmesi veya diğer işlemler için zaman tanır.

a_element = driver.find_elements(By.XPATH, "//a[contains(@href,'deep-learning-path')]")[1]
driver.execute_script("arguments[0].focus();", a_element)
#driver.execute_script("arguments[0].scrollIntoView(true);", a_element)  # iki script de kullanılabilir yukarıdaki ile 
time.sleep(2)
a_element.click()  # Bağlantıyı tıkla






##### DROP-DOWN SCROOL

In [302]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

In [317]:
options = webdriver.ChromeOptions () #WebDriver’a tarayıcı ile nasıl çalışması gerektiğini söylemek için bu seçenekleri kullanırız
options.add_argument("--start-maximized") #tarayıcı penceresinin tam ekran modunda açılmasını sağlar. 
driver = webdriver.Chrome(options)  #Chrome tarayıcı kullanılarak WebDriver başlatılır. 
driver.get("https://miuul.com/katalog") 
time.sleep(2) #program 2 saniye bekler. Bu bekleme süresi, sayfanın tam olarak yüklenmesi veya diğer işlemler için zaman tanır.


dropdown_button = driver.find_elements(By.XPATH, "//a[@data-bs-toggle='dropdown']")[1]
dropdown_button.click()
time.sleep(0.5)
ul_element = driver.find_elements(By.XPATH, "//ul[@aria-labelledby='navbarDropdown']")[1]
driver.execute_script("arguments[0].setAttribute('style', arguments[1]);", ul_element, "overflow: scroll; height:80px;") 
#JavaScript kullanılarak, açılır menüye scroll eklenir ve yüksekliği 80 piksel olarak ayarlanır

driver.execute_script("arguments[0].focus();", ul_element)
#JavaScript ile bu kod liste öğesine odaklanır yani liste seçilip aktif hale getirilir.

from selenium.webdriver.common.action_chains import ActionChains
#Selenium’un ActionChains sınıfını projeye dahil eder. Bu sınıf, klavye ve fare hareketlerini simüle etmek için kullanılır.

action = ActionChains(driver)
#ActionChains nesnesi oluşturulur. Bu nesneyle tarayıcıda fare ve klavye eylemleri yapılır.

action.send_keys(Keys.ARROW_DOWN).perform()
#ActionChains kullanarak klavyeden aşağı ok tuşuna basma işlemi gerçekleştirilir.

time.sleep(0.25)  #programı 0.25 saniye duraklatır

action.send_keys(Keys.ARROW_DOWN).perform()

time.sleep(0.25)

action.send_keys(Keys.ARROW_DOWN).perform()



#.perform() metodu, Selenium’daki ActionChains sınıfı ile yapılan bir veya birden fazla eylemin 
#(klavye veya fare hareketlerinin) tarayıcıda gerçekleştirilmesini sağlar. 

# Pagination

In [323]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

# Initialize Driver
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options)
driver.get("https://learning.miuul.com/enrollments")

course_titles = []
for i in range(1,999):  #Bir döngü başlatılıyor. 1’den 999’a kadar olan sayfaları tarayacak.
    
    driver.get(f"https://learning.miuul.com/enrollments?page={i}")    
    #Döngüde her sayfa için ilgili URL’yi açıyor ve her seferinde farklı bir sayfayı hedefliyor.
   
    # Get Course Titles Per Page
    course_elements = driver.find_elements(By.XPATH, "//ul//h3")  #kurs başlıklarını h3 etiketleri kullanarak buluyor.
    if not course_elements: # len(course_elements) <= 0    #Eğer sayfada kurs başlığı yoksa (yani boşsa), döngüyü durduruyor.
        break
   
    
    for course in course_elements:   #Bulunan her kurs başlığı üzerinde tek tek işlem yapmak için bir alt döngü başlatılıyor.
        title = course.get_attribute("innerText")  #innerText özelliği kullanılarak başlık bilgisi alınıyor.
        course_titles.append(title)   #Alınan kurs başlığı, daha önce oluşturulan course_titles listesine ekleniyor.

print(course_titles)
print(len(course_titles))



[]
0


In [None]:
--------- burda birr hata var çözümleyeceğim !!!!


# Scraping a Web Page

In [325]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd

# Initialize Driver
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(options)
driver.get("https://miuul.com/katalog")
time.sleep(2)

#NOT   
#input = driver.find_elements(By.XPATH , "//label[contains(text(),'İleri')]/preciding-sibling::input")
#input = driver.find_elements(By.XPATH , "//label[contains(text(),'İleri')]/parent::div/input")  iki kod aynı


input = driver.find_elements(By.XPATH, "//label[contains(text(),'İleri')]/preceding-sibling::input")
input[0].click() if input else None




In [327]:
course_blocks = driver.find_elements(By.XPATH, "//div[contains(@class,'card catalog'))]")

data= []
for block in course_blocks:
    course_title = block.find_elements(By.XPATH, ".//h6")
    course_desc = block.find_elements(By.XPATH, ".//p")

    course_title = course_title[0].get_attribute("innerText") if course_title else None
    course_desc = course_desc[0].get_attribute("innerText") if course_desc else None

    print(course_title)
    print(course_desc)

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=129.0.6668.90)
Stacktrace:
0   chromedriver                        0x0000000102898248 cxxbridge1$str$ptr + 1907280
1   chromedriver                        0x0000000102890730 cxxbridge1$str$ptr + 1875768
2   chromedriver                        0x00000001024a4260 cxxbridge1$string$len + 89488
3   chromedriver                        0x000000010247f728 core::str::slice_error_fail::hbaf5d05fe3921cd2 + 3776
4   chromedriver                        0x000000010250ee8c cxxbridge1$string$len + 526780
5   chromedriver                        0x0000000102522218 cxxbridge1$string$len + 605512
6   chromedriver                        0x00000001024dd12c cxxbridge1$string$len + 322652
7   chromedriver                        0x00000001024ddd7c cxxbridge1$string$len + 325804
8   chromedriver                        0x00000001028604d8 cxxbridge1$str$ptr + 1678560
9   chromedriver                        0x0000000102864e40 cxxbridge1$str$ptr + 1697352
10  chromedriver                        0x00000001028455ec cxxbridge1$str$ptr + 1568244
11  chromedriver                        0x0000000102865710 cxxbridge1$str$ptr + 1699608
12  chromedriver                        0x0000000102836b90 cxxbridge1$str$ptr + 1508248
13  chromedriver                        0x0000000102881828 cxxbridge1$str$ptr + 1814576
14  chromedriver                        0x0000000102881980 cxxbridge1$str$ptr + 1814920
15  chromedriver                        0x00000001028903d0 cxxbridge1$str$ptr + 1874904
16  libsystem_pthread.dylib             0x0000000199d4ef94 _pthread_start + 136
17  libsystem_pthread.dylib             0x0000000199d49d34 thread_start + 8


# Using Proxy with Selenium

In [328]:
import requests
from bs4 import BeautifulSoup

# Proxy bilgileri
proxies = {
    'http': 'http://47.241.43.44:7777',
    'https': 'http://47.241.43.44:7777',
}

# Proxy kullanarak istek yapma
response = requests.get('https://example.com', proxies=proxies)

# Beautiful Soup ile içeriği işleme
soup = BeautifulSoup(response.content, 'html.parser')

print(soup.title.text)

Example Domain


# Using undetected-chromedriver to Pass Bot Tests

In [None]:
pip install undetected_chromedriver

In [None]:
import undetected_chromedriver as uc
# pip install undetected_chromedriver
from selenium import webdriver

url = "https://bot.sannysoft.com"

driver = uc.Chrome()
with driver:
    driver.get(url)

In [None]:
pip install undetected_chromedriver --timeout=100

In [None]:
python -m pip install --upgrade pip