<a href="https://colab.research.google.com/github/mratanusarkar/Web-Scraping-tickertapeIN/blob/basic-scraper-colab/scraping_tickertapeIN_stockNames.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Web Scraping all stock names from tickertape.in

**Input**: None <br>
**Output**: 2 list of strings containing "Top" stock names and "All" stock names on "https://www.tickertape.in/stocks"

## Import Packages

In [10]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import time
from datetime import timedelta

## Request and Fetch the Webpage (for one sample page)

In [12]:
# hit "https://www.tickertape.in/stocks?filter=<filter-value>"
requests.get("https://www.tickertape.in/stocks?filter=a")

<Response [200]>

In [13]:
# wow! no restriction for bots! no need of any headers!
response = requests.get("https://www.tickertape.in/stocks?filter=a")
response.text[0:500]

'<!DOCTYPE html><html lang="en-US"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link rel="shortcut icon" href="/favicon/favicon.png"/><link rel="apple-touch-icon" href="/favicon/favicon-192x192.png"/><link rel="manifest" href="/manifest/manifest.json"/><style type="text/css">:root {--white: #ffffff; --font_primary: #535B62; --font_dark: #2f363f; --font_light: #81878c; --font_blue: #0088ea; --font_lighter: #a2a8ae; --brand_primary: #151e28; --brand_success: #28c39a; --brand_danger:'

In [14]:
# not required for this webpage, use if bot restrictions are added in future.

# google chrome browser's request header (to make it look like, we are making this request from a browser)
header = {
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
}

# hit using the header
response = requests.get("https://www.tickertape.in/stocks?filter=a", headers=header)
response.text[0:500]

'<!DOCTYPE html><html lang="en-US"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link rel="shortcut icon" href="/favicon/favicon.png"/><link rel="apple-touch-icon" href="/favicon/favicon-192x192.png"/><link rel="manifest" href="/manifest/manifest.json"/><style type="text/css">:root {--white: #ffffff; --font_primary: #535B62; --font_dark: #2f363f; --font_light: #81878c; --font_blue: #0088ea; --font_lighter: #a2a8ae; --brand_primary: #151e28; --brand_success: #28c39a; --brand_danger:'

## Pass the fetched webpage response to Beautiful Soup

In [15]:
# give the webpage to Beautiful Soup using parsers: "html.parser" or "lxml"
soup = BeautifulSoup(response.text, 'lxml')

## Let us try and extract data (from one sample page)

- extract one company name in the page
- extract all company name in the page

### Extracting one company name

In [18]:
# company name
htmlBlock = soup.find("li")
print(htmlBlock)

<li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/ab-cotspin-india-ABCO">A B Cotspin India Ltd</a></li>


In [23]:
htmlBlock.a['href'].split('/')[2]

'ab-cotspin-india-ABCO'

In [24]:
htmlBlock.a.text

'A B Cotspin India Ltd'

### Extracting all the company names

In [28]:
# company name
htmlBlock = soup.find_all("li")
print(htmlBlock)

[<li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/ab-cotspin-india-ABCO">A B Cotspin India Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a-b-infrabuild-ABIN">A B Infrabuild Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a-f-enterprises-AFE">A F Enterprises Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a-infrastructure-AIN">A Infrastructure Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/akcapital-services-AKC">A K Capital Services Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a-and-m-jumbo-bags-AMJU">A and M Jumbo Bags Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a-1-acid-AAL">A-1 Acid Ltd</a></li>, <li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/a2z-infra-engineering-A2ZI">A2z Infra Engineering Ltd</a></li>, <li class="jsx-1528870203"><a class=

In [29]:
htmlBlock[0]

<li class="jsx-1528870203"><a class="jsx-1528870203" href="/stocks/ab-cotspin-india-ABCO">A B Cotspin India Ltd</a></li>

In [31]:
htmlBlock[0].a['href'].split('/')[2]

'ab-cotspin-india-ABCO'

In [42]:
fullList = list(map(lambda element: element.a['href'], htmlBlock))
fullList

['/stocks/ab-cotspin-india-ABCO',
 '/stocks/a-b-infrabuild-ABIN',
 '/stocks/a-f-enterprises-AFE',
 '/stocks/a-infrastructure-AIN',
 '/stocks/akcapital-services-AKC',
 '/stocks/a-and-m-jumbo-bags-AMJU',
 '/stocks/a-1-acid-AAL',
 '/stocks/a2z-infra-engineering-A2ZI',
 '/stocks/aa-plus-tradelink-AAP',
 '/stocks/aaa-technologies-AAA',
 '/stocks/aar-commercial-company-AARC',
 '/stocks/aar-shyam-india-investment-company-AARS',
 '/stocks/aarv-infratel-AARV',
 '/stocks/abb-india-ABB',
 '/stocks/abc-india-ABC',
 '/stocks/abm-knowledgeware-ABM',
 '/stocks/acc-ACC',
 '/stocks/ace-software-exports-ACES',
 '/stocks/aci-infocom-ACII',
 '/stocks/ad-manum-finance-ADM',
 '/stocks/adc-india-communications-KRO',
 '/stocks/adf-foods-AMRN',
 '/stocks/agi-infra-AGI',
 '/stocks/ags-transact-technologies-AGS',
 '/stocks/aia-engineering-AIAE',
 '/stocks/gammon-infrastructure-projects-GAIN',
 '/stocks/a-k-spintex-AKSP',
 '/stocks/akg-exim-AKGE',
 '/stocks/aki-india-AKI',
 '/stocks/akm-lace-and-embrotex-AKM',
 '

In [43]:
stocksList = list(map(lambda x: x.split('/')[2], filter(lambda x: True if "stocks" in x else False, fullList)))
stocksList

['ab-cotspin-india-ABCO',
 'a-b-infrabuild-ABIN',
 'a-f-enterprises-AFE',
 'a-infrastructure-AIN',
 'akcapital-services-AKC',
 'a-and-m-jumbo-bags-AMJU',
 'a-1-acid-AAL',
 'a2z-infra-engineering-A2ZI',
 'aa-plus-tradelink-AAP',
 'aaa-technologies-AAA',
 'aar-commercial-company-AARC',
 'aar-shyam-india-investment-company-AARS',
 'aarv-infratel-AARV',
 'abb-india-ABB',
 'abc-india-ABC',
 'abm-knowledgeware-ABM',
 'acc-ACC',
 'ace-software-exports-ACES',
 'aci-infocom-ACII',
 'ad-manum-finance-ADM',
 'adc-india-communications-KRO',
 'adf-foods-AMRN',
 'agi-infra-AGI',
 'ags-transact-technologies-AGS',
 'aia-engineering-AIAE',
 'gammon-infrastructure-projects-GAIN',
 'a-k-spintex-AKSP',
 'akg-exim-AKGE',
 'aki-india-AKI',
 'akm-lace-and-embrotex-AKM',
 'amd-industries-AMDM',
 'amj-land-holdings-AMJL',
 'ams-polymers-AMM',
 'ang-industries-ANGI',
 'ang-lifesciences-india-ANA',
 'ani-integrated-services-ANII',
 'ans-industries-ANS',
 'apl-apollo-tubes-APLA',
 'apm-finvest-APM',
 'apm-industr

In [44]:
etfsList = list(map(lambda x: x.split('/')[2], filter(lambda x: True if "etfs" in x else False, fullList)))
etfsList

['aditya-bsl-gold-etf-AITY',
 'aditya-bsl-nifty-50-etf-ADIY',
 'aditya-bsl-sensex-30-etf-BSL',
 'aditya-birla-sun-life-nifty-healthcare-etf-HEALT',
 'aditya-birla-sun-life-nifty-bank-etf-ADIL',
 'aditya-birla-sun-life-nifty-it-etf-TECT',
 'aditya-birla-sun-life-nifty-next-50-etf-ADIB',
 'aditya-birla-sun-life-silver-etf-SILVR',
 'axis-aaa-bond-plus-sdl-etf-2026-matur-reg-growth-AXISB',
 'axis-banking-etf-AXIS',
 'axis-consumption-etf-AXISC',
 'axis-gold-etf-AXGF',
 'axis-healthcare-etf-AXISH',
 'axis-nifty-50-etf-AISN',
 'axis-technology-etf-regular-growth-AXIST']

## Let us try and extract data (from all the pages)

Hit "https://www.tickertape.in/stocks?filter={filter}" with filter = "Top", "A"-"Z" and "Others"

In [45]:
# declare the filter values for all the pages
top = ["top"]
a_z = list("abcdefghijklmnopqrstuvwxyz")
others = ["others"]

In [46]:
# TODO