In [1]:
# Import dependencies
import scrapy
from scrapy.crawler import CrawlerProcess
import pandas as pd

In [2]:
# Scrapper
# ============================================
# li.list-group.item = Conference list Item
# a[title] = Conference title
# time = Conference date & time
# place = Conference venue
# ============================================
class MediConferencesSpider(scrapy.Spider):
    name = 'MediConferencesSpider'
    start_urls = ['https://www.omicsonline.org/medical-conferences.php']

    def parse(self, response):
        for item in response.css('li.list-group-item'):
            yield {
                'title': item.css('a[title] ::text').get(default='not-found'),
                'time': item.css('time ::attr("datetime")').get(default='not-found'),                
                'place': item.css('em ::text').get(default='not-found')
            }                                

In [3]:
# to get a json output
process = CrawlerProcess(settings={
    'FEED_FORMAT': 'json',
    'FEED_URI': 'conferences.json'
})

# do the actual run
process.crawl(MediConferencesSpider)
process.start()
process.stop()

2020-02-16 11:53:06 [scrapy.utils.log] INFO: Scrapy 1.8.0 started (bot: scrapybot)
2020-02-16 11:53:06 [scrapy.utils.log] INFO: Versions: lxml 4.5.0.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.20.0, Twisted 19.10.0, Python 3.8.1 (default, Jan  8 2020, 15:55:49) [MSC v.1916 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d  10 Sep 2019), cryptography 2.8, Platform Windows-10-10.0.17134-SP0
2020-02-16 11:53:06 [scrapy.crawler] INFO: Overridden settings: {'FEED_FORMAT': 'json', 'FEED_URI': 'conferences.json'}
2020-02-16 11:53:06 [scrapy.extensions.telnet] INFO: Telnet Password: a3c850d50c1dd003
2020-02-16 11:53:06 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-02-16 11:53:06 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.do

In [4]:
# Display the data using pandas
conferences_json = pd.read_json('conferences.json')
conferences_json

Unnamed: 0,title,time,place
0,4th International conference on Diabetic compi...,2019-11-25,"Osaka, Japan"
1,"9th International Conference on Probiotics, Fu...",2019-11-25,"Osaka, Japan"
2,"3rd International conference on Diabetes, Hype...",2019-11-25,"Tokyo, Japan"
3,16th Global Summit on Hematology and Infectiou...,2019-11-25,"London, UK"
4,5th Global Pediatric Ophthalmology Congress,2019-11-25,"Rome, Italy"
...,...,...,...
428,World Congress on Surgery and Anesthesia,2019-11-25,"Osaka, Japan"
429,International Conference on Cancer Treatment a...,2019-11-25,"Tokyo, Japan"
430,New frontiers on Neurology and Neural Disorders,2019-11-25,"Tokyo, Japan"
431,10th International Conference on Chronic Obstr...,2019-11-25,"Dubai, UAE"


# References

https://www.omicsonline.org/medical-conferences.php

https://docs.scrapy.org/en/latest/topics/selectors.html

In [3]:
# to get a json output
process = CrawlerProcess(settings={
    'FEED_FORMAT': 'csv',
    'FEED_URI': 'conferences.csv'
})

# do the actual run
process.crawl(MediConferencesSpider)
process.start()
process.stop()

2020-02-16 15:56:03 [scrapy.utils.log] INFO: Scrapy 1.8.0 started (bot: scrapybot)
2020-02-16 15:56:03 [scrapy.utils.log] INFO: Versions: lxml 4.4.2.0, libxml2 2.9.9, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 19.10.0, Python 3.7.1 (default, Dec 10 2018, 22:54:23) [MSC v.1915 64 bit (AMD64)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d  10 Sep 2019), cryptography 2.8, Platform Windows-10-10.0.18362-SP0
2020-02-16 15:56:03 [scrapy.crawler] INFO: Overridden settings: {'FEED_FORMAT': 'csv', 'FEED_URI': 'conferences.csv'}
2020-02-16 15:56:03 [scrapy.extensions.telnet] INFO: Telnet Password: 38dffc96daca96a9
2020-02-16 15:56:03 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-02-16 15:56:04 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downl

2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '41st World Dental Science and Oral Health Congress', 'time': '2019-11-25', 'place': 'Singapore, Singapore'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '29th World Diabetes & Heart Congress', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '4th International Conference On Hypertension and Cardiac Health', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Annual meeting on Cardiology and Heart Failure', 'time': '2019-11-25', 'place': 'Kuala Lumpur, Malaysia'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <

2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '28th International Conference on Clinical Pediatrics', 'time': '2019-11-25', 'place': 'London, UK'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd International Conference on Pediatrics, Neonatology and Healthcare', 'time': '2019-11-25', 'place': 'Amsterdam, Netherlands'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd European Cardiology Congress', 'time': '2019-11-25', 'place': 'Florence, Italy'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '29th International Conference on Pediatrics, Neonatology and Pediatric Nursing', 'time': '2019-11-25', 'place': 'Kyoto, Japan'}
2020-02-16 15:56:05 [scrapy.core.

2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '12th International Conference on Surgery and Anesthesia', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Global summit on Cosmetic & Reconstructive Surgery', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd World Congress on Advancements in Tuberculosis and Lung Diseases', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '25th World Congress & Expo on Dermatology', 'time': '2019-11-25', 'place': 'Osaka, Japan'}
2020-02-16 15:56:05 [scrapy.core.scraper] DEBUG: Scraped from 

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '3rd International Conference on Advances in Neonatal and Pediatric Nutrition', 'time': '2019-11-25', 'place': 'Zurich, Switzerland'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd Middle East Heart Congress', 'time': '2019-11-25', 'place': 'Abu Dhabi, UAE'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '26th International Conference on Human Metabolic Health: Diabetes, Obesity, and Metabolism', 'time': '2019-11-25', 'place': 'Abu Dhabi, UAE'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '16th International Conference on Nephrology and Hypertension', 'time': '2019-11-25', 'place': 'Abu Dhabi, UAE'}
2020-02

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'International Conference on Digestive and Gastroenterology Health Conditions', 'time': '2019-11-25', 'place': 'Barcelona, Spain'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '20th World Gastroenterologists Summit', 'time': '2019-11-25', 'place': 'Osaka, Japan'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '25th International Conference on Dental Education', 'time': '2019-11-25', 'place': 'Florence, Italy'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '9th Annual Congress on Primary Healthcare, Nursing & Neonatal Screening', 'time': '2019-11-25', 'place': 'Vancouver, Canada'}
2020-02-16 15:56:06 [scrapy.cor

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'World Congress on Heart and Pediatric Cardiology', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '34th International Conference on Neonatology and Perinatology', 'time': '2019-11-25', 'place': 'Paris, France'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '33rd World Pediatrics Conference', 'time': '2019-11-25', 'place': 'Paris, France'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '27th Euro Congress and Expo on Dental & Oral Health', 'time': '2019-11-25', 'place': 'Paris, France'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.om

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '5th World Congress on Nephrology and Renal Care', 'time': '2019-11-25', 'place': 'Barcelona, Spain'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '5th World Congress on Cardiology and Cardio care', 'time': '2019-11-25', 'place': 'Barcelona, Spain'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'International Conference on Cardiac Imaging and Diagnotics', 'time': '2019-11-25', 'place': 'Dubai, United Arab Emirates'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd Annual Conference on Vascular Medicine', 'time': '2019-11-25', 'place': 'Dubai, United Arab Emirates'}
2020-02-16 15:56:06 [scrapy.core.scraper] D

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '27th International Conference & Exhibition on Cardiovascular and Thoracic Surgery', 'time': '2019-11-25', 'place': 'Beijing, China'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '4th Annual Conference on Orthopedics, Rheumatology and Osteoporosis', 'time': '2019-11-25', 'place': 'Prague, Czech Republic'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '12th International Chronic Obstructive Pulmonary Disease Conference', 'time': '2019-11-25', 'place': 'Barcelona, Spain'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '31st European Pediatrics Conference', 'time': '2019-11-25', 'place': 'Barcelona, Spain'}
2020-0

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '26th International Conference on Skin Care and Cosmetology', 'time': '2019-11-25', 'place': 'Prague, Czech Republic'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'International Conference on Lung and Respiratory Diseases', 'time': '2019-11-25', 'place': 'Paris, France'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '24th World Congress on Pediatrics, Neonatology and Primary Care', 'time': '2019-11-25', 'place': 'Helsinki, Finland'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '19th Asia Pacific Ophthalmologists Annual Meeting', 'time': '2019-11-25', 'place': 'Singapore, Singapore'}
2020-02-16 15:56:06 [scra

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Annual Congress on Dental Health and Oral Care', 'time': '2019-11-25', 'place': 'Vancouver, Canada'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Annual World Dentists Summit', 'time': '2019-11-25', 'place': 'Vancouver, Canada'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '3rd Annual Conference on Hematologists', 'time': '2019-11-25', 'place': 'Dubai, United Arab Emirates'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '6th Annual Summit on Pediatric Cardiology 2020', 'time': '2019-11-25', 'place': 'Vancouver, Canada'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonli

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'International Conference on Asthma and Allergy', 'time': '2019-11-25', 'place': 'Lisbon, Portugal'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'International Structural Heart Disease Conference', 'time': '2019-11-25', 'place': 'Bali, Indonesia'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '3rd Annual Congress on Orthodontics and Endodontics', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd International Conference on Case Reports', 'time': '2019-11-25', 'place': 'Berlin, Germany'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://w

2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '2nd Annual Congress on Pediatric Cardiology', 'time': '2019-11-25', 'place': 'Montreal, Canada'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Global Cardiovascular Research and Clinical Cardiology', 'time': '2019-11-25', 'place': 'Montreal, Canada'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': 'Annual Congress on Perinatology & Child Care', 'time': '2019-11-25', 'place': 'Tokyo, Japan'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonline.org/medical-conferences.php>
{'title': '3rd World Heart and Brain Conference', 'time': '2019-11-25', 'place': 'Abu Dhabi, UAE'}
2020-02-16 15:56:06 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.omicsonlin

<DeferredList at 0x15ff5647710 current result: []>