#### Building Movie Recommendation using scrapy

In [1]:
import scrapy
from scrapy.crawler import CrawlerProcess

class MovieRecommendation(scrapy.Spider):
    name = 'IMDB_Movies'
    start_urls = ['https://www.imdb.com/chart/top/']
    custom_settings = {'DEFAULT_REQUEST_HEADERS': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',}}
    
    def parse(self, response):
        for movie in response.css('ul.ipc-metadata-list li'):
            movieName = movie.css('div.ipc-title a h3::text').get()
            movieUrl = movie.css('div.ipc-title a::attr(href)').get()
            dic = {'movieName': movieName}
            yield response.follow(movieUrl, callback = self.movieParseInfo, meta=dic,dont_filter=True)
            
    def movieParseInfo(self, response):
        movieName = response.meta['movieName']
        dire = response.css('ul.ipc-metadata-list.ipc-metadata-list--dividers-all.sc-bfec09a1-8.bHYmJY.ipc-metadata-list--base:first-of-type li:first-of-type span[aria-label="See full cast and crew"] + div a.ipc-metadata-list-item__list-content-item--link')
        directorurl = dire[0].css('::attr(href)').get()
        directorname = dire[0].css('::text').get()
        dir_dic = {'movieName': movieName,'directorName': directorname}
        yield response.follow(directorurl, callback = self.directorparseinfo, meta=dir_dic,dont_filter=True)
        
    def directorparseinfo(self, response):
        movieName = response.meta['movieName']
        directorName = response.meta['directorName']
        top4movies = response.css('div[data-testid="nm_flmg_kwn_for"] div.ipc-primary-image-list-card__content a::text').getall()
        top4movies = ','.join(top4movies)
        yield {'MovieName': movieName, 'DirectorName' : directorName, 'Top Four Movies': top4movies }
            
process = CrawlerProcess(settings = {'FEEDS': {'MovieRecommendation_using_scrapy.csv': {'format': 'csv'}}})
process.crawl(MovieRecommendation)
process.start()

2024-04-06 00:17:15 [scrapy.utils.log] INFO: Scrapy 2.6.2 started (bot: scrapybot)
2024-04-06 00:17:15 [scrapy.utils.log] INFO: Versions: lxml 4.9.1.0, libxml2 2.9.14, cssselect 1.1.0, parsel 1.6.0, w3lib 1.21.0, Twisted 22.2.0, Python 3.9.13 (main, Aug 25 2022, 23:51:50) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 22.0.0 (OpenSSL 1.1.1q  5 Jul 2022), cryptography 37.0.1, Platform Windows-10-10.0.19045-SP0
2024-04-06 00:17:15 [scrapy.crawler] INFO: Overridden settings:
{}
2024-04-06 00:17:15 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2024-04-06 00:17:15 [scrapy.extensions.telnet] INFO: Telnet Password: 1fe803a084af3543
2024-04-06 00:17:15 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2024-04-06 00:17:15 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloaderm

2024-04-06 00:17:20 [filelock] DEBUG: Attempting to acquire lock 2656977320256 on C:\Users\pduseja\Anaconda3\lib\site-packages\tldextract\.suffix_cache/urls\2857a861fad3e342b67f126bcac61993.tldextract.json.lock
2024-04-06 00:17:20 [filelock] DEBUG: Lock 2656977320256 acquired on C:\Users\pduseja\Anaconda3\lib\site-packages\tldextract\.suffix_cache/urls\2857a861fad3e342b67f126bcac61993.tldextract.json.lock
2024-04-06 00:17:20 [urllib3.connectionpool] DEBUG: Starting new HTTPS connection (1): raw.githubusercontent.com:443
2024-04-06 00:17:20 [filelock] DEBUG: Attempting to release lock 2656977320256 on C:\Users\pduseja\Anaconda3\lib\site-packages\tldextract\.suffix_cache/urls\2857a861fad3e342b67f126bcac61993.tldextract.json.lock
2024-04-06 00:17:20 [filelock] DEBUG: Lock 2656977320256 released on C:\Users\pduseja\Anaconda3\lib\site-packages\tldextract\.suffix_cache/urls\2857a861fad3e342b67f126bcac61993.tldextract.json.lock
2024-04-06 00:17:20 [tldextract] ERROR: Exception reading Public 

2024-04-06 00:17:24 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0060196/?ref_=chttp_t_10> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:17:24 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0108052/?ref_=chttp_t_6> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:17:24 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0071562/?ref_=chttp_t_4> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:17:25 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0167260/?ref_=chttp_t_7> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:17:25 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0050083/?ref_=chttp_t_5> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:17:26 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0064116/?ref_=chttp_t_52> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:

2024-04-06 00:17:32 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000229/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0108052/?ref_=chttp_t_6)
2024-04-06 00:17:32 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000229/?ref_=tt_cl_dr_1>
{'MovieName': "6. Schindler's List", 'DirectorName': 'Steven Spielberg', 'Top Four Movies': "Schindler's List,E.T. The Extra-Terrestrial,Saving Private Ryan,Raiders of the Lost Ark"}
2024-04-06 00:17:33 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0166256/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0103639/?ref_=chttp_t_248)
2024-04-06 00:17:33 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0166256/?ref_=tt_cl_dr_1>
{'MovieName': '248. Aladdin', 'DirectorName': 'Ron Clements', 'Top Four Movies': 'Hercules,Aladdin,The Princess and the Frog,Moana'}
2024-04-06 00:17:34 [scrapy.core.engine] DEBUG: Crawled (200) <GET htt

2024-04-06 00:17:43 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0936404/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0059742/?ref_=chttp_t_237)
2024-04-06 00:17:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0936404/?ref_=tt_cl_dr_1>
{'MovieName': '237. The Sound of Music', 'DirectorName': 'Robert Wise', 'Top Four Movies': 'West Side Story,The Sound of Music,The Andromeda Strain,The Haunting'}
2024-04-06 00:17:43 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000406/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0032551/?ref_=chttp_t_239)
2024-04-06 00:17:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000406/?ref_=tt_cl_dr_1>
{'MovieName': '239. The Grapes of Wrath', 'DirectorName': 'John Ford', 'Top Four Movies': "The Man Who Didn't Talk Too Much,The Grapes of Wrath,How Green Was My Valley,Stagecoach"}
2024-04-06 00:17:44 [scrapy.core.engine]

2024-04-06 00:17:58 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0003433/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0019254/?ref_=chttp_t_221)
2024-04-06 00:17:58 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0003433/?ref_=tt_cl_dr_1>
{'MovieName': "221. La passion de Jeanne d'Arc", 'DirectorName': 'Carl Theodor Dreyer', 'Top Four Movies': "Gertrud,Vampyr,Ordet,La passion de Jeanne d'Arc"}
2024-04-06 00:17:58 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000116/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0088247/?ref_=chttp_t_220)
2024-04-06 00:17:59 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000116/?ref_=tt_cl_dr_1>
{'MovieName': '220. The Terminator', 'DirectorName': 'James Cameron', 'Top Four Movies': 'Avatar: The Way of Water,Avatar,Alien 2,Titanic'}
2024-04-06 00:18:00 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.co

2024-04-06 00:18:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0001008/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0031679/?ref_=chttp_t_208)
2024-04-06 00:18:17 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0001008/?ref_=tt_cl_dr_1>
{'MovieName': '208. Mr. Smith Goes to Washington', 'DirectorName': 'Frank Capra', 'Top Four Movies': "It's a Wonderful Life,It Happened One Night,You Can't Take It with You,Mr. Smith Goes to Washington"}
2024-04-06 00:18:19 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0073195/?ref_=chttp_t_205> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:18:19 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0046268/?ref_=chttp_t_204> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:18:20 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0213450/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/titl

2024-04-06 00:18:35 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0001415/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0047296/?ref_=chttp_t_195)
2024-04-06 00:18:36 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0001415/?ref_=tt_cl_dr_1>
{'MovieName': '195. On the Waterfront', 'DirectorName': 'Elia Kazan', 'Top Four Movies': 'On the Waterfront,East of Eden,A Streetcar Named Desire,Anatolian Smile'}
2024-04-06 00:18:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0015864/?ref_=chttp_t_191> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:18:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0001837/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0097165/?ref_=chttp_t_194)
2024-04-06 00:18:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt2119532/?ref_=chttp_t_190> (referer: https://www.imdb.com/chart/top/)
2024-04-0

2024-04-06 00:18:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0006498/?ref_=tt_cl_dr_1>
{'MovieName': '182. Bacheha-Ye Aseman', 'DirectorName': 'Majid Majidi', 'Top Four Movies': 'Bacheha-Ye Aseman,Baran,Khorshid,Rang-e khoda'}
2024-04-06 00:18:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000631/?ref_=tt_cl_dr_1>
{'MovieName': '184. Blade Runner', 'DirectorName': 'Ridley Scott', 'Top Four Movies': 'The Martian,Blade Runner,Thelma & Louise,Gladiator'}
2024-04-06 00:18:46 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt23289160/?ref_=chttp_t_177> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:18:46 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0946734/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt1201607/?ref_=chttp_t_181)
2024-04-06 00:18:46 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0946734/?ref_=tt_cl_dr_1>
{'M

2024-04-06 00:18:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm1218281/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt10872600/?ref_=chttp_t_172)
2024-04-06 00:18:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm1218281/?ref_=tt_cl_dr_1>
{'MovieName': '172. Spider-Man: No Way Home', 'DirectorName': 'Jon Watts', 'Top Four Movies': 'Cop Car,Spider-Man: No Way Home,The Old Man,The Onion News Network'}
2024-04-06 00:18:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000180/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0050212/?ref_=chttp_t_171)
2024-04-06 00:18:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0230032/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt2096673/?ref_=chttp_t_167)
2024-04-06 00:18:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000180/?ref_=tt_cl_dr_1>
{'MovieName': '171. The Bridge o

2024-04-06 00:19:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0457430/?ref_=chttp_t_146> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000041/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0042876/?ref_=chttp_t_157)
2024-04-06 00:19:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000041/?ref_=tt_cl_dr_1>
{'MovieName': '157. Rashōmon', 'DirectorName': 'Akira Kurosawa', 'Top Four Movies': 'The Hidden Fortress,Kagemusha,Ran,Shichinin No Samurai'}
2024-04-06 00:19:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0898288/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt1392214/?ref_=chttp_t_158)
2024-04-06 00:19:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0836328/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0057115/?ref_=chttp_t_155)
2024-04-06

2024-04-06 00:19:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0055031/?ref_=chttp_t_136> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt10272386/?ref_=chttp_t_135> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0042192/?ref_=chttp_t_137> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:09 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000041/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0089881/?ref_=chttp_t_143)
2024-04-06 00:19:09 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000041/?ref_=tt_cl_dr_1>
{'MovieName': '143. Ran', 'DirectorName': 'Akira Kurosawa', 'Top Four Movies': 'The Hidden Fortress,Kagemusha,Ran,Shichinin No Samurai'}
2024-04-06 00:19:09 [scrapy.core.engine] DEBUG: Crawled 

2024-04-06 00:19:15 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000217/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0993846/?ref_=chttp_t_131)
2024-04-06 00:19:15 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000217/?ref_=tt_cl_dr_1>
{'MovieName': '131. The Wolf of Wall Street', 'DirectorName': 'Martin Scorsese', 'Top Four Movies': 'Killers of the Flower Moon,Taxi Driver,Mean Streets,GoodFellas'}
2024-04-06 00:19:15 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt8579674/?ref_=chttp_t_125> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:16 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0040522/?ref_=chttp_t_124> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:16 [scrapy.extensions.logstats] INFO: Crawled 270 pages (at 160 pages/min), scraped 130 items (at 78 items/min)
2024-04-06 00:19:16 [scrapy.core.engine] DEBUG: Crawled (200) <GE

2024-04-06 00:19:22 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0005363/?ref_=tt_cl_dr_1>
{'MovieName': '121. Snatch', 'DirectorName': 'Guy Ritchie', 'Top Four Movies': 'Sherlock Holmes,Lock, Stock and Two Smoking Barrels,Snatch,The Man from U.N.C.L.E.'}
2024-04-06 00:19:22 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0093058/?ref_=chttp_t_110> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:23 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0612322/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0056592/?ref_=chttp_t_114)
2024-04-06 00:19:23 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0612322/?ref_=tt_cl_dr_1>
{'MovieName': '114. To Kill a Mockingbird', 'DirectorName': 'Robert Mulligan', 'Top Four Movies': "To Kill a Mockingbird,Summer of '42,The Other,The Nickel Ride"}
2024-04-06 00:19:23 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://ww

2024-04-06 00:19:29 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0898288/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt1255953/?ref_=chttp_t_104)
2024-04-06 00:19:30 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0898288/?ref_=tt_cl_dr_1>
{'MovieName': '104. Incendies', 'DirectorName': 'Denis Villeneuve', 'Top Four Movies': 'Dune,Blade Runner 2049,Arrival,Incendies'}
2024-04-06 00:19:30 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000033/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0052357/?ref_=chttp_t_105)
2024-04-06 00:19:30 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000033/?ref_=tt_cl_dr_1>
{'MovieName': '105. Vertigo', 'DirectorName': 'Alfred Hitchcock', 'Top Four Movies': 'Psycho,North by Northwest,Vertigo,The Birds'}
2024-04-06 00:19:30 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt2106476/?ref_=chttp_t_96>

2024-04-06 00:19:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0459552/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0091251/?ref_=chttp_t_92)
2024-04-06 00:19:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt7286456/?ref_=chttp_t_81> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:37 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0459552/?ref_=tt_cl_dr_1>
{'MovieName': '92. Idi I Smotri', 'DirectorName': 'Elem Klimov', 'Top Four Movies': 'Idi I Smotri,Agoniya,Pokhozhdeniya zubnogo vracha,Dobro pozhalovat, ili Postoronnim vkhod vospreshchen'}
2024-04-06 00:19:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0119698/?ref_=chttp_t_82> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:37 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt4154796/?ref_=chttp_t_80> (referer: https://www.imdb.com/chart/top/)
2024-04-

2024-04-06 00:19:44 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0361748/?ref_=chttp_t_70> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:44 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0090605/?ref_=chttp_t_69> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:44 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0032553/?ref_=chttp_t_68> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:19:45 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000583/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0082096/?ref_=chttp_t_78)
2024-04-06 00:19:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000583/?ref_=tt_cl_dr_1>
{'MovieName': '78. Das Boot', 'DirectorName': 'Wolfgang Petersen', 'Top Four Movies': 'Das Boot,Enemy Mine,The Neverending Story,Outbreak'}
2024-04-06 00:19:45 [scrapy.core.engine] DEBUG: Crawled (2

2024-04-06 00:19:52 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000040/?ref_=tt_cl_dr_1>
{'MovieName': '63. Paths of Glory', 'DirectorName': 'Stanley Kubrick', 'Top Four Movies': '2001: A Space Odyssey,Barry Lyndon,A Clockwork Orange,Full Metal Jacket'}
2024-04-06 00:19:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm2130108/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt4633694/?ref_=chttp_t_65)
2024-04-06 00:19:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm2130108/?ref_=tt_cl_dr_1>
{'MovieName': '65. Spider-Man: Into the Spider-Verse', 'DirectorName': 'Bob Persichetti', 'Top Four Movies': 'Spider-Man: Into the Spider-Verse,Spider-man: Across the Spider-verse,Le Petit Prince,Puss in Boots: The Last Wish'}
2024-04-06 00:19:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0209144/?ref_=chttp_t_57> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:1

2024-04-06 00:20:00 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000033/?ref_=tt_cl_dr_1>
{'MovieName': '51. Rear Window', 'DirectorName': 'Alfred Hitchcock', 'Top Four Movies': 'Psycho,North by Northwest,Vertigo,The Birds'}
2024-04-06 00:20:00 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0482571/?ref_=chttp_t_43> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:20:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt2582802/?ref_=chttp_t_42> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:20:01 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0868153/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0095765/?ref_=chttp_t_49)
2024-04-06 00:20:01 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0868153/?ref_=tt_cl_dr_1>
{'MovieName': '49. Cinema Paradiso', 'DirectorName': 'Giuseppe Tornatore', 'Top Four Movies': "La Migliore 

2024-04-06 00:20:09 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm1690966/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt9362722/?ref_=chttp_t_36)
2024-04-06 00:20:09 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0076759/?ref_=chttp_t_30> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:20:09 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm1690966/?ref_=tt_cl_dr_1>
{'MovieName': '36. Spider-man: Across the Spider-verse', 'DirectorName': 'Joaquim Dos Santos', 'Top Four Movies': 'Spider-man: Across the Spider-verse,The Legend of Korra,Avatar: The Last Airbender,Voltron: Legendary Defender'}
2024-04-06 00:20:10 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0118799/?ref_=chttp_t_27> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:20:10 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0317248/?ref_=chttp_t_26> (referer: htt

2024-04-06 00:20:16 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0001129/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0102926/?ref_=chttp_t_24)
2024-04-06 00:20:16 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0000229/?ref_=tt_cl_dr_1>
{'MovieName': '25. Saving Private Ryan', 'DirectorName': 'Steven Spielberg', 'Top Four Movies': "Schindler's List,E.T. The Extra-Terrestrial,Saving Private Ryan,Raiders of the Lost Ark"}
2024-04-06 00:20:16 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0001129/?ref_=tt_cl_dr_1>
{'MovieName': '24. The Silence of the Lambs', 'DirectorName': 'Jonathan Demme', 'Top Four Movies': 'The Silence of the Lambs,Rachel Getting Married,Philadelphia,Adaptation.'}
2024-04-06 00:20:16 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0000041/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0047478/?ref_=chttp_t_23)
2024-04-06 00:20:16 [scr

2024-04-06 00:20:43 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/title/tt0169547/?ref_=chttp_t_73> (referer: https://www.imdb.com/chart/top/)
2024-04-06 00:20:45 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.imdb.com/name/nm0005222/?ref_=tt_cl_dr_1> (referer: https://www.imdb.com/title/tt0169547/?ref_=chttp_t_73)
2024-04-06 00:20:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.imdb.com/name/nm0005222/?ref_=tt_cl_dr_1>
{'MovieName': '73. American Beauty', 'DirectorName': 'Sam Mendes', 'Top Four Movies': '1917,Road to Perdition,Skyfall,Revolutionary Road'}
2024-04-06 00:20:45 [scrapy.core.engine] INFO: Closing spider (finished)
2024-04-06 00:20:45 [scrapy.extensions.feedexport] INFO: Stored csv feed (250 items) in: MovieRecommendation_using_scrapy.csv
2024-04-06 00:20:45 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 370350,
 'downloader/request_count': 509,
 'downloader/request_method_count/GET': 509,
 