<a href="https://colab.research.google.com/github/mratanusarkar/Web-Scraping-tickertapeIN/blob/feature%2Fscrape-all-sections/scraping_tickertapeIN_stocks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Web Scraping stock data from tickertape.in

**Input**: stock name in "https://www.tickertape.in/stocks/{stock-name}" <br>
**Output**: full stock data & predictions from tickertape in JSON/Py Dictionary format


## Import Packages

In [26]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup

import json
import time
from datetime import timedelta

## Request and Fetch the Webpage

Let's try with a sample stock name, say "TCS"

In [27]:
# enter company subdirectory
# subdir = "tata-consultancy-services-TCS"
subdir = "adani-green-energy-ADNA"

In [28]:
# hit "https://www.tickertape.in/stocks/tata-consultancy-services-TCS"
requests.get("https://www.tickertape.in/stocks/" + subdir)

<Response [200]>

In [29]:
# wow! no restriction for bots! no need of any headers!
response = requests.get("https://www.tickertape.in/stocks/" + subdir)
response.text[0:500]

'<!DOCTYPE html><html lang="en-US"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link rel="shortcut icon" href="/favicon/favicon.png"/><link rel="apple-touch-icon" href="/favicon/favicon-192x192.png"/><link rel="manifest" href="/manifest/manifest.json"/><style type="text/css">:root {--white: #ffffff; --font_primary: #535B62; --font_dark: #2f363f; --font_light: #81878c; --font_blue: #0088ea; --font_lighter: #a2a8ae; --brand_primary: #151e28; --brand_success: #28c39a; --brand_danger:'

In [30]:
# not required for this webpage, use if bot restrictions are added in future.

# google chrome browser's request header (to make it look like, we are making this request from a browser)
header = {
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
}

# hit using the header
response = requests.get("https://www.tickertape.in/stocks/" + subdir, headers=header)
response.text[0:500]

'<!DOCTYPE html><html lang="en-US"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link rel="shortcut icon" href="/favicon/favicon.png"/><link rel="apple-touch-icon" href="/favicon/favicon-192x192.png"/><link rel="manifest" href="/manifest/manifest.json"/><style type="text/css">:root {--white: #ffffff; --font_primary: #535B62; --font_dark: #2f363f; --font_light: #81878c; --font_blue: #0088ea; --font_lighter: #a2a8ae; --brand_primary: #151e28; --brand_success: #28c39a; --brand_danger:'

## Pass the fetched webpage response to Beautiful Soup

In [31]:
# give the webpage to Beautiful Soup using parsers: "html.parser" or "lxml"
soup = BeautifulSoup(response.text, 'lxml')

## Let us try and extract some data from the soup

- we see the whole webpage and how the html dom structure is made
- on inspecting, we see that all our required info is mostly inside div blocks with unique class names
- few are inside span or h tags, but all blocks has classes
- Let us extract few important html dom blocks and see

### [1] Basic Company Information

In [32]:
# company name
htmlBlock = soup.find("h3", class_="security-name")
print(htmlBlock.prettify())

value = htmlBlock.text if htmlBlock is not None else None
print(value)

<h3 class="jsx-2903438179 security-name">
 Adani Green Energy Ltd
</h3>
Adani Green Energy Ltd


In [33]:
# ticker name
htmlBlock = soup.find("span", class_="ticker")
print(htmlBlock.prettify())

value = htmlBlock.text if htmlBlock is not None else None
print(value)

<span class="jsx-2903438179 ticker text-teritiary font-medium">
 ADANIGREEN
</span>
ADANIGREEN


In [34]:
# current price
htmlBlock = soup.find("span", class_="current-price")
print(htmlBlock.prettify())

value = htmlBlock.text if htmlBlock is not None else None
print(value)

<span class="jsx-3168773259 current-price typography-h1 text-primary">
 2,864.30
</span>

2,864.30


In [35]:
# change absolute-value
htmlBlock = soup.find("span", class_="absolute-value")
print(htmlBlock.prettify())

value = htmlBlock.text if htmlBlock is not None else None
print(value)

<span class="jsx-3168773259 change absolute-value text-14 typography-body-medium-l up">
 <i class="jsx-3168773259 icon-Green-up">
 </i>
 0.00
 <!-- -->
 %
</span>

0.00%


In [36]:
# change percentage-value
htmlBlock = soup.find("span", class_="percentage-value")
print(htmlBlock.prettify())

value = htmlBlock.text if htmlBlock is not None else None
print(value)

<span class="jsx-3168773259 change percentage-value text-14 up">
 (
 <!-- -->
 +
 <!-- -->
 0.00
 <!-- -->
 )
</span>
 (+0.00)


### [2] Investment Checklist

In [37]:
# checklist-item carousel-item
htmlBlock = soup.find("div", class_="carousel-item")
print(htmlBlock.prettify())

<div class="jsx-3083281824 checklist-item carousel-item selected">
 <div class="jsx-3228946760 jsx-482152645 commentary-item-root d-flex-row align-start justify-start " eventlabel="Clicked Checklist Item">
  <i class="jsx-3228946760 jsx-482152645 icon-mood icon-neutral-comment text-24 mr12">
  </i>
  <div class="jsx-3228946760 jsx-482152645 content">
   <h4 class="jsx-3228946760 jsx-482152645 typography-body-medium-m text-primary">
    <span class="jsx-3228946760 jsx-482152645 relative no-select tooltip-holder">
     Intrinsic Value
     <div class="jsx-1503855875 tooltip-root sh-tooltip font-regular">
      Intrinsic value is the calculated value of the company and may differ from current stock price. If intrinsic value &gt; current price, price increase is expected in the future to reduce the gap and vice-versa
     </div>
    </span>
   </h4>
   <p class="jsx-3228946760 jsx-482152645 lh-138 text-13 text-secondary typography-body-regular-m commentary-desc">
    Insufficient Data
   <

In [38]:
htmlBlock = soup.find("div", class_="carousel-item")

# get all keys and values
for item in htmlBlock.childGenerator():
    key = item.find("span", class_="tooltip-holder").contents[0]
    key = key.title().replace(" ", "")
    value = item.find("i")['class'][3].split("-")[1]

    print(key, value)

IntrinsicValue neutral
RoeVsFdRates negative
DividendReturns negative
EntryPoint negative
NoRedFlags positive


### [3] Price Chart

### [4] Key Metrics

### [5] Forecast & Ratings

In [39]:
# Forecast
htmlBlock = soup.find("div", class_="forecast-radial")
print(htmlBlock.prettify())

<div class="jsx-3770717616 forecast-radial">
 <div class="jsx-3770717616 radial-holder">
  <div class="rv-xy-plot rv-radial-chart" style="width:64px;height:64px">
   <svg class="rv-xy-plot__inner" height="64" width="64">
    <g class="rv-xy-plot__series rv-xy-plot__series--arc " opacity="1" pointer-events="all" transform="translate(32,32)">
     <path class="rv-xy-plot__series rv-xy-plot__series--arc-path rv-radial-chart__series--pie__slice " d="M1.959434878635765e-15,-32A32,32,0,1,1,-1.959434878635765e-15,32A32,32,0,1,1,1.959434878635765e-15,-32M1.6907553595872534e-14,-24A24,24,0,1,0,-1.6907553595872534e-14,24A24,24,0,1,0,1.6907553595872534e-14,-24Z" style="opacity:1;stroke:transparent;fill:rgba(129, 135, 140, 0.22)">
     </path>
     <path class="rv-xy-plot__series rv-xy-plot__series--arc-path rv-radial-chart__series--pie__slice " d="M1.959434878635765e-15,-32L1.4695761589768238e-15,-24Z" style="opacity:1;stroke:transparent;fill:#07d459">
     </path>
    </g>
   </svg>
  </div>
  <

In [40]:
# get forcast percentage

value = htmlBlock.div.span.contents[0] if str(htmlBlock.div.span.contents[0]) != "—" else ""
print(value)

symbol = htmlBlock.div.span.span.text if htmlBlock.div.span.span is not None else ""
print(symbol)

print(value + symbol if (value + symbol) != "" else None)



None


In [41]:
"" + ""

''

In [42]:
# get forcast text
value = htmlBlock.h4.text if htmlBlock.h4 is not None else None
print(value)

None


# Let us try and extract data (from all the pages)

Import scraped data containing all stock/etf names and subdirectory, <br>
and Hit "https://www.tickertape.in/stocks/{stock-name}"

In [43]:
with open('top-company-list.json', 'r') as fp:
    topcompanies = json.load(fp)

topcompanies[0]

{'name': 'Adani Enterprises Ltd',
 'subdirectory': 'adani-enterprises-ADEL',
 'type': 'stocks'}

In [44]:
with open('full-company-list.json', 'r') as fp:
    allcompanies = json.load(fp)

allcompanies[0]

{'name': 'A & M Febcon Ltd',
 'subdirectory': 'a-and-m-febcon-AMF',
 'type': 'stocks'}

In [45]:
def scrapeTickertape(name, stocktype, subdirectory):
    _url = "https://www.tickertape.in/" + stocktype + "/" + subdirectory
    _data = {}

    try:
        # hit the page and get html
        _header = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
        }
        _response = requests.get(_url, headers=_header)

        # give the webpage to Beautiful Soup using parsers: "html.parser" or "lxml"
        _soup = BeautifulSoup(_response.text, 'lxml')

        ######### extract html data from webpage and form output data #########

        # company name
        _htmlBlock = _soup.find("h3", class_="security-name")
        _data["companyName"] = _htmlBlock.text if _htmlBlock is not None else None

        # ticker name
        _htmlBlock = _soup.find("span", class_="ticker")
        _data["ticker"] = _htmlBlock.text if _htmlBlock is not None else None

        # current price
        _htmlBlock = _soup.find("span", class_="current-price")
        _data["currentPrice"] = _htmlBlock.text if _htmlBlock is not None else None

        # checklist carousel-item get all keys and values
        _htmlBlock = _soup.find("div", class_="carousel-item")
        for _item in _htmlBlock.childGenerator():
            _key = _item.find("span", class_="tooltip-holder").contents[0]
            _key = _key.title().replace(" ", "")
            _value = _item.find("i")['class'][3].split("-")[1]
            _data[_key] = _value

        # Forecast
        _htmlBlock = _soup.find("div", class_="forecast-radial")

        _value = _htmlBlock.div.span.contents[0] if str(_htmlBlock.div.span.contents[0]) != "—" else ""
        _symbol = _htmlBlock.div.span.span.text if _htmlBlock.div.span.span is not None else ""
        _data["buyRecommendation"] = _value + _symbol if (_value + _symbol) != "" else None

        _data["forecast"] = _htmlBlock.h4.text if _htmlBlock.h4 is not None else None

        return _data
    except Exception as _e:
        print(_e)
        return []

In [46]:
scrapeTickertape(topcompanies[0]["name"], topcompanies[0]["type"], topcompanies[0]["subdirectory"])

{'DividendReturns': 'negative',
 'EntryPoint': 'negative',
 'IntrinsicValue': 'negative',
 'NoRedFlags': 'positive',
 'RoeVsFdRates': 'negative',
 'buyRecommendation': '0%',
 'companyName': 'Adani Enterprises Ltd',
 'currentPrice': '2,199.45',
 'forecast': 'Analysts have suggested that investors can buy this stock',
 'ticker': 'ADANIENT'}

In [47]:
# # let's scrape all the stocks & etfs!
# topStocksData = []
# count = 0

# for companies in topcompanies:
#     companyName = companies["name"]
#     companyType = companies["type"]
#     companyDir = companies["subdirectory"]
#     print(companyName, "url: https://www.tickertape.in/" + companyType + "/" + companyDir)
#     try:
#         # get data from each page and append to data list
#         if companyType == "stocks":
#             topStocksData.append(scrapeTickertape(companyName, companyType, companyDir))
#             print("successful!")
#             count += 1
#     except Exception as _e:
#         # some issue occured, catch exception
#         print("failed!")
#         print(_e)

# print(count, "/", len(topcompanies), "completed")

In [48]:
# # let's scrape all the stocks & etfs!
# allStocksData = []
# count = 0

# for companies in allcompanies:
#     companyName = companies["name"]
#     companyType = companies["type"]
#     companyDir = companies["subdirectory"]
#     print(companyName, "url: https://www.tickertape.in/" + companyType + "/" + companyDir)
#     try:
#         # get data from each page and append to data list
#         if companyType == "stocks":
#             topStocksData.append(scrapeTickertape(companyName, companyType, companyDir))
#             print("successful!")
#             count += 1
#     except Exception as _e:
#         # some issue occured, catch exception
#         print("failed!")
#         print(_e)

# print(count, "/", len(topcompanies), "completed")

## Exporting the data

In [49]:
# with open("top-company-tickertape-data.json", "w") as outfile:
#     json.dump(topStocksData, outfile)

In [50]:
# with open("all-company-tickertape-data.json", "w") as outfile:
#     json.dump(allStocksData, outfile)