# Chapter 1

In [14]:
# full path to body
xpath = '/html/body'

# takes to any div in doc
xpath = '//div'

# takes to any span which class equal 'span-class'
xpath = "//span[@class='span-class']"

# Chapter 2

In [13]:
from scrapy import Selector
import requests

# * is the wildcard, takes to any and all childen 
xpath = '/html/body/*'

# /* select all childen, or elements one generation below
# //* selects all elements from all future generations of body

xpath = '//div/div/*'

# accesses every p that class equals 'class-1'
xpath = '//p[@class="class-1"]'

# access every element that id equals 'uid'
xpath = '//*[@id="uid"]'

# filter every div that id equals 'uid' and access second paragraph
xpath = '//div[@id="uid"]/p[2]'

xpath = '/*[contains(@class, "class-1")]'

xpath = '/html/body/div/p[2]/@class'

# Scrapy Part

html = '''
<html>
    <body>
        <div class="hello datacamp">
            <p>Hello World</p>
        </div>
        <p>Enjoy Datacamp!</p>
        <div>Div 1: <p>paragraph 1</p></div>
        <div>Div 2: <p>paragraph 2</p> <p>paragraph 3</p> </div>
        <div>Div 3: <p>paragraph 4</p> <p>paragraph 5</p> <p>paragraph 6</p></div>
        <div>Div 4: <p>paragraph 7</p></div>
        <div>Div 5: <p>paragraph 8</p></div>
    </body>
</html>
'''

# Create a Selector selecting html as the HTML document
sel = Selector(text=html) 

# Create a SelectorList of all div elements in the HTML document
divs = sel.xpath('//div')

print('\n####divs####\n')
print(divs)
print('\n####divs.extract()####\n')
print(divs.extract())
print('\n####divs.extract_first()####\n')
print(divs.extract_first())

# concatenate xpath commands
sel.xpath( '//div' ).xpath( "./span/p[3]")

# HTML text to Selector


url = 'https://en.wikipedia.org/wiki/Web_scraping'
html = requests.get(url).content
sel = Selector(text=html)

# print number of elements in page
print( "You have found: ", len( sel.xpath('//*') ) )



####divs####

[<Selector xpath='//div' data='<div class="hello datacamp">\n        ...'>, <Selector xpath='//div' data='<div>Div 1: <p>paragraph 1</p></div>'>, <Selector xpath='//div' data='<div>Div 2: <p>paragraph 2</p> <p>par...'>, <Selector xpath='//div' data='<div>Div 3: <p>paragraph 4</p> <p>par...'>, <Selector xpath='//div' data='<div>Div 4: <p>paragraph 7</p></div>'>, <Selector xpath='//div' data='<div>Div 5: <p>paragraph 8</p></div>'>]

####divs.extract()####

['<div class="hello datacamp">\n            <p>Hello World</p>\n        </div>', '<div>Div 1: <p>paragraph 1</p></div>', '<div>Div 2: <p>paragraph 2</p> <p>paragraph 3</p> </div>', '<div>Div 3: <p>paragraph 4</p> <p>paragraph 5</p> <p>paragraph 6</p></div>', '<div>Div 4: <p>paragraph 7</p></div>', '<div>Div 5: <p>paragraph 8</p></div>']

####divs.extract_first()####

<div class="hello datacamp">
            <p>Hello World</p>
        </div>


# Chapter 3

### CSS Locators

- replace / by > (except first character)
```
- XPath: /hmtl/body/div
- CSS Locator: html > body > div
```

- replace // by a black space (except first character)
```
- XPath: //div/span//p
- CSS Locator: div > span p
```

- replace [N] by :nth-of-type(N)
```
- XPath: //div/p[2]
- CSS Locator: div > p:nth-of-type(2)
```

- find an element by class, use a period .
```
- p.class-1
```

- find an element by id, use a pound sign #
```
- div#uid
```


# Create the XPath string equivalent to the CSS Locator 
```
xpath = '//div[@id="uid"]/span//h4'
```
# Create the CSS Locator string equivalent to the XPath
```
css_locator = 'div#uid > span h4'
```

```
css_locator = '#uid > *'
```