-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrap.py
65 lines (56 loc) · 2 KB
/
scrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import re
import redis
import json
import os
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
def scrap():
"""
Scraps latest copy of equity bhaav from bseindia websites
and parse csv file scrapped to store result in redis DB
"""
URL = "https://www.bseindia.com/markets/MarketInfo/BhavCopy.aspx"
print("Fetching Data ...")
try:
db = redis.from_url(os.environ.get("REDIS_URL"))
# db = redis.Redis()
stock_count = 0
website = urlopen(URL)
if website.status == 200:
# Read HTML content of weblink
html = website.read().decode('utf-8')
# Search for given pattern URL
pattern = '"(http://www.bseindia.com/download/BhavCopy/Equity/.*?)"'
links = re.findall(pattern, html)
# Download and extract zip file
response = urlopen(links[0])
zipfile = ZipFile(BytesIO(response.read()))
# Reset db to avoid multiple writes
db.delete("data");
for csvfilename in zipfile.namelist():
# Read csv file and store stock data to db as json string
for line in zipfile.open(csvfilename).readlines()[1:]:
arr = line.decode('utf-8').split(",")
dic = {
"CODE": arr[0],
"NAME": arr[1],
"OPEN": arr[4],
"HIGH": arr[5],
"LOW": arr[6],
"CLOSE": arr[7],
}
stock_count += 1
json_string = json.dumps(dic)
db.sadd("data", json_string)
print("Data write Complete");
return stock_count
else:
print("Website not reachable")
return stock_count
except Exception as e:
print(e)
print("Error while fetching data")
return 0
if __name__ == "__main__":
scrap()