### Commands for the Bot:

1. ?joba: lists 10 latest jobs

2. ?joba 15: lists 15 latest jobs
3. ?joba h: display help
4. ?joba i: display info

5. ?joba t 2021: query year 2021
6. ?joba t 2021 03: query year 2021 & month 03
7. ?joba 15 t 2021: query year 2021 and list latest 15 jobs
8. ?joba 15 t 2021 03: query year 2021 & month 03 and list latest 15 jobs
9. ?joba e 1: displays full post at 1 from latest posts
10. ?joba e 1 t 2021: display full post at 1 from latest 2021 posts
11. ?joba e 1 t 2021 03: display full post at 1 from latest 03/2021 post



In [1]:
# Importing required dependencies
from bs4 import BeautifulSoup as soup
import requests as req
import discord as dc
import os
import nest_asyncio

In [11]:
# Crawler class for getting the job data
class SXCPCCrawler:

    base_url = ''  # Base url of the site to be crawled

    # Constructor to initialize with the default values
    def __init__(self):
        self.base_url = 'https://sxcpc.blogspot.com/'

    # Function to get the page content and convert it into
    # BeautifulSoup object to be parsed later
    def _get_page_content(self, year='', month='', page_name=''):

        url = self.base_url + year + month  # Crafting the url

        response = req.get(url)
        page_html = soup(response.text, 'lxml')

        return page_html

    # Method to extract the required info
    def crawl_archive(self, year='', month=''):

        page_html = self._get_page_content(year, month)

        archive = [(item['href'], item.text)
                   for item in page_html.body.find('div', {
                       'id': 'ArchiveList'
                   }).find('ul', {
                       'class': 'posts'
                   }).findAll('a')]

        return archive
    
    def crawl_post(self, year='', month='', page_name=''):
        page_html = self._get_page_content(year, month, page_name)
        full_post = page_html.find('div', {
            'class': 'blog-posts'
        })
        date_of_post = full_post.h2.span.text
        post_name = full_post.h3.text
        post_body = full_post.find('div', {
            'class': 'post-body'
        }).text
        post_meta = {
            'date_of_post': date_of_post,
            'post_name': post_name,
            'post_body': post_body
        }
        return post_meta
        

In [12]:
crawler = SXCPCCrawler()  # Crawler object
client = dc.Client()  # Discord object

In [14]:
archive = crawler.crawl_archive()
query_tokens = archive[0][0].split('/')[-3:]
crawler.crawl_post(
    query_tokens[0] + '/',
    query_tokens[1] + '/',
    query_tokens[2] + '/'
)

{'date_of_post': 'Thursday, March 25, 2021',
 'post_name': '\nDIGIT INSURANCE\n',
 'post_body': '\n\xa0URL: www.godigit.com\xa0We are a young organization that has proved a lot in a short span of time. We are redefining how insurance is perceived in India. We have launched multiple insurance products and wish to establish the gold standard of how Insurance companies work. We are looking for people who would help us achieve that vision. We have openings in the below function.Location: BangalorePosition: Trainee Executive (Actuarial)\xa0Required:\xa0The final year 2021 batch - B.Com, BMS, Economics, Statistics, Mathematics(Only those who are doing Actuarial...they can apply)We are looking for people who would help us achieve that vision.We have openings in the below function:Team: Strategy & Business PlanningRole: The team is involved in working on various aspects which is geared\xa0towards increase the Top Line and improving the bottom line. As a\xa0business analyst you would work on an

In [None]:
nest_asyncio.apply()

@client.event
async def on_message(message):
    if message.author == client.user:
        return

    msg = message.content
    args = msg.strip().split()
    argc = len(args)
    
    if msg.startswith('?joba'):
        if argc == 1:
            # Only 1 option - just print latest 10 job post name

            postlist = crawler.crawl()
            count = min(10, len(postlist))
            
            embed = dc.Embed(title=f'__**Latest {count} Job Post Results:**__', color=0x03f8fc)
            for post in postlist[:count]:
                embed.add_field(
                    name='\u200b',
                    value=
                    f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                    inline=True)
            await message.channel.send(embed=embed)
        elif argc == 2:
            # Only 1 option - just print latest 'm' job post name
            # where m is the 2nd argument supplied

            if (args[1].isnumeric()):
                postlist = crawler.crawl()
                count = min(int(args[1]), len(postlist))
                embed = dc.Embed(title=f'__**Latest {count} Job Post Results:**__', color=0x03f8fc)
                for post in postlist[:count]:
                    embed.add_field(
                        name='\u200b',
                        value=
                        f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                        inline=True)
                await message.channel.send(embed=embed)
            elif (args[1] == 'h'):
                # Display help
                embed = dc.Embed(title=f'__**Usage:**__', color=0x03f8fc)
                embed.add_field(name='\u200b', 
                               value='> **?joba**: Displays latest 10 job post.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba [m]**: Displays latest \'m\' job post.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba h**: Displays help.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba i**: Displays bot information.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba t [yyyy]**: Displays latest 10 job post of the provided year.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba t [yyyy] [mm]**: Displays latest 10 job post of the provided year and month.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba [m] t [yyyy]**: Displays latest \'m\' job post of the provided year.',
                               inline=False)
                embed.add_field(name='\u200b', 
                               value='> **?joba [m] t [yyyy] [mm]**: Displays latest \'m\' job post of the provided year and month.',
                               inline=False)
                await message.channel.send(embed=embed)
            elif (args[1] == 'i'):
                # Display bot info
                embed = dc.Embed(title=f'__**Bot Info:**__', color=0x03f8fc)
                embed.add_field(name='\u200b',
                               value='> **@developer**: thenocturnalguy\n> **@description**: A discord bot to display latest job post on https://sxcpc.blogspot.com\n> **@version**: v1.0.3\n> **@updated_at**: 24.03.2021')
                
                await message.channel.send(embed=embed)
            else:
                await message.channel.send('Wrong count provided!')

        elif argc > 2:
            # 4 options

            if (args[1] == 't'):
                if (argc == 3):
                    yr = args[2]
                    if (yr.isnumeric()):
                        postlist = crawler.crawl(yr + '/')
                        count = min(10, len(postlist))
                        embed = dc.Embed(title=f'__**Latest {count} Job Post Results of {yr}:**__', color=0x03f8fc)
                        for post in postlist[:count]:
                            embed.add_field(
                                name='\u200b',
                                value=
                                f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                                inline=True)
                        await message.channel.send(embed=embed)
                    else:
                        await message.channel.send('Wrong year provided!')
                elif (argc == 4):
                    yr = args[2]
                    mn = args[3]

                    if (yr.isnumeric() and mn.isnumeric()):
                        postlist = crawler.crawl(yr + '/', mn + '/')
                        count = min(10, len(postlist))
                        embed = dc.Embed(title=f'__**Latest {count} Job Post Results of {mn}/{yr}:**__', 
                                         color=0x03f8fc)
                        for post in postlist[:count]:
                            embed.add_field(
                                name='\u200b',
                                value=
                                f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                                inline=True)
                        await message.channel.send(embed=embed)
                    else:
                        await message.channel.send(
                            'Wrong year or month provided!')
                else:
                    await message.channel.send('Wrong arguments provided!')
            elif (args[2] == 't'):
                count = args[1]
                if (count.isnumeric()):

                    if (argc == 4):
                        yr = args[3]
                        if (yr.isnumeric()):
                            postlist = crawler.crawl(yr + '/')
                            count = min(int(count), len(postlist))
                            embed = dc.Embed(title=f'__**Latest {count} Job Post Results of {yr}:**__', 
                                             color=0x03f8fc)
                            for post in postlist[:count]:
                                embed.add_field(
                                    name='\u200b',
                                    value=
                                    f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                                    inline=True)
                            await message.channel.send(embed=embed)
                        else:
                            await message.channel.send('Wrong year provided!')
                    elif (argc == 5):
                        yr = args[3]
                        mn = args[4]

                        if (yr.isnumeric() and mn.isnumeric()):
                            postlist = crawler.crawl(yr + '/', mn + '/')
                            count = min(int(count), len(postlist))
                            embed = dc.Embed(title=f'__**Latest {count} Job Post Results of {mn}/{yr}:**__', 
                                             color=0x03f8fc)
                            for post in postlist[:count]:
                                embed.add_field(
                                    name='\u200b',
                                    value=
                                    f'> ** {post[1]} ** \t\t\t \n > ** [Go to Post]({post[0]}) **',
                                    inline=True)
                            await message.channel.send(embed=embed)
                        else:
                            await message.channel.send(
                                'Wrong year or month provided!')
                    else:
                        await message.channel.send('Wrong arguments provided!')
                else:
                    await message.channel.send('Wrong count provided!')
            elif (args[1] == 'e'):
                if (argc == 3):
                    post_num = args[2]
                    if (post_num.isnumeric()):
                        postlist = crawler.crawl(yr + '/', mn + '/')
                        post_num = min(int(post_num), len(postlist))
                        query_tokens = archive[post_num][0].split('/')[-3:]
                        post_meta = crawler.crawl_post(
                            query_tokens[0] + '/',
                            query_tokens[1] + '/',
                            query_tokens[2] + '/'
                        )
                        embed = dc.Embed(title=f'__** {post_meta['post_name']} **__ * ({post_meta['post_date']}) *', 
                                             color=0x03f8fc)
                        embed.add_field(
                            name='\u200b',
                            value=
                            f'> {post_meta['post_body']}',
                            inline=False)
                        await message.channel.send(embed=embed)
                    else:
                        await message.channel.send('Wrong argumens provided!')
                else:
                    await message.channel.send('Wrong argumens provided!')
               
            else:
                await message.channel.send('Wrong arguments provided!')
        else:
            await message.channel.send('Wrong arguments provided!')


client.run('ODIzOTQ3MDg5MjIxMzg2MjQw.YFoOhg.XZGf8gpTpn8HgfYuQIbJizTWOjU')
