# Spider Class Skeleton

```
import scrapy

class YourSpider(scrapy.Spider):
  name = "your_spider"

  def start_requests(self):
    yield scrapy.Request( url = url, callback = self.parse )

  def parse(self, response):
    result = response.body
```

  # `start_requests` method

```
  def start_requests( self ):
    urls = ["https://www.datacamp.com" ,"https://scrapy.org"]
    for url in urls:
      yield scrapy.Request( url = url, callback = self.parse ) // the real request 
```

# `parse` method

```
def parse( self, response ):
    some_file = 'some_file.html'
    with open( html_file, 'wb' ) as f:
        f.write( response.body ) // save the response
```

# Self Referencing

```
import scrapy

class YourSpider( scrapy.Spider ):
  name = "your_spider"

  def start_requests( self ):
    self.print_msg( "Hello World!" ) // calling this classe's method with parameter

  def parse( self, response ):
    pass

  def print_msg( self, msg ):
    print( "Calling start_requests in YourSpider prints out:", msg ) // defining a class method that takes msg as argument
```

# Inspecting into spider

```
import scrapy

class DCspider( scrapy.Spider ):
  name = 'dcspider'

  def start_requests( self ):
    yield scrapy.Request( url = url_short, callback = self.parse )

  def parse( self, response ): // response will be passed here from start_request
    author_names = response.css('p.course-block__author-name::text').extract()
    return author_names
```

# A Web Crawler

```
import scrapy
from scrapy.crawler import CrawlerProcess

class Some_Spider_Class(scrapy.Spider):
  name = "some_spider_class"

  def start_requests( self ):
    url = 'https://www.datacamp.com/courses/all'
    yield scrapy.Request( url = url,  callback = self.parse_front )

  def parse_front( self, response ):
    course_blocks = response.css( 'div.course-block' )
    course_links = course_blocks.xpath( './a/@href' )
    links_to_follow = course_links.extract()
    for url in links_to_follow:
      yield response.follow( url = url, callback = self.parse_pages )


  def parse_pages( self, response ):
    crs_title = response.xpath('//h1[contains(@class,"title")]/text()')
    crs_title_ext = crs_title.extract_first().strip()
    ch_titles = response.css( 'h4.chapter__title::text' )
    ch_titles_ext = [t.strip() for t in ch_titles.extract()]
    dc_dict[ crs_title_ext ] = ch_titles_ext


dc_dict = dict()

process = CrawlerProcess()

process.crawl(Some_Spider_Class)

process.start()
```