/
updater.py
224 lines (154 loc) · 7.34 KB
/
updater.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
""" Update package data """
import os
import re
import time
from urllib.parse import urljoin
import bs4
import requests
from pyhelpers.ops import confirmed
from pyhelpers.store import load_pickle, save_pickle
from . import LineData, OtherAssets
from .utils import cd_dat, fake_requests_headers, homepage_url
def collect_site_map(confirmation_required=True):
"""
Collect data of the site map.
:param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True``
:type confirmation_required: bool
:return: dictionary of site map data
:rtype: dict
"""
if confirmed("To collect the site map?", confirmation_required=confirmation_required):
url = urljoin(homepage_url(), '/misc/sitemap.shtm')
source = requests.get(url, headers=fake_requests_headers())
soup = bs4.BeautifulSoup(source.text, 'lxml')
# <h3>
h3 = [x.get_text(strip=True) for x in soup.find_all('h3')]
site_map = {}
# Next <ol>
next_ol = soup.find('h3').find_next('ol')
for i in range(len(h3)):
li_tag, ol_tag = next_ol.findChildren('li'), next_ol.findChildren('ol')
if not ol_tag:
dat_ = [x.find('a').get('href') for x in li_tag]
if len(dat_) == 1:
dat = urljoin(homepage_url(), dat_[0])
else:
dat = [urljoin(homepage_url(), x) for x in dat_]
site_map.update({h3[i]: dat})
else:
site_map_ = {}
for ol in ol_tag:
k = ol.find_parent('ol').find_previous('li').get_text(strip=True)
if k not in site_map_.keys():
sub_li, sub_ol = ol.findChildren('li'), ol.findChildren('ol')
if sub_ol:
cat0 = [x.get_text(strip=True) for x in sub_li if not x.find('a')]
dat0 = [[urljoin(homepage_url(), a.get('href')) for a in x.find_all('a')] for x in sub_ol]
cat_name = ol.find_previous('li').get_text(strip=True)
if cat0:
site_map_.update({cat_name: dict(zip(cat0, dat0))})
else:
site_map_.update({cat_name: [x_ for x in dat0 for x_ in x]})
# cat_ = [x for x in cat_ if x not in cat0]
else:
cat_name_ = ol.find_previous('li').get_text(strip=True)
pat = r'.+(?= \(the thousands of mileage files)'
cat_name = re.search(pat, cat_name_).group(0) if re.match(pat, cat_name_) else cat_name_
dat0 = [urljoin(homepage_url(), x.a.get('href')) for x in sub_li]
site_map_.update({cat_name: dat0})
site_map.update({h3[i]: site_map_})
if i < len(h3) - 1:
next_ol = next_ol.find_next('h3').find_next('ol')
return site_map
def fetch_site_map(update=False, confirmation_required=True, verbose=False):
"""
Fetch the site map from the package data.
:param update: whether to check on update and proceed to update the package data, defaults to ``False``
:type update: bool
:param confirmation_required: whether to prompt a message for confirmation to proceed, defaults to ``True``
:type confirmation_required: bool
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool, int
:return: dictionary of site map data
:rtype: dict
**Examples**::
from pyrcs.updater import fetch_site_map
update = False
site_map = fetch_site_map(update)
update = True
site_map = fetch_site_map(update)
"""
path_to_pickle = cd_dat("site-map.pickle")
print("Getting site map", end=" ... ") if verbose == 2 else ""
if os.path.isfile(path_to_pickle) and not update:
site_map = load_pickle(path_to_pickle)
else:
try:
print("The package data is unavailable or needs to be updated ... ") if verbose == 2 else ""
site_map = collect_site_map(confirmation_required=confirmation_required)
print("Done.") if verbose == 2 else ""
save_pickle(site_map, path_to_pickle, verbose=verbose)
except Exception as e:
site_map = None
print("Failed. {}".format(e))
return site_map
def update_backup_data(verbose=False, time_gap=5):
"""
Update package data.
:param verbose: whether to print relevant information in console as the function runs, defaults to ``False``
:type verbose: bool
:param time_gap: time gap (in seconds) between the updating of different classes
:type time_gap: int
**Example**::
from pyrcs.updater import update_backup_data
verbose = True
time_gap = 5
update_backup_data(verbose, time_gap)
"""
if confirmed("To update resources? "):
# Site map
_ = fetch_site_map(update=True, confirmation_required=False, verbose=verbose)
line_dat = LineData(update=True)
# ELR and mileages
_ = line_dat.ELRMileages.fetch_elr(update=True, verbose=verbose)
time.sleep(time_gap)
# Electrification
_ = line_dat.Electrification.fetch_electrification_codes(update=True, verbose=verbose)
time.sleep(time_gap)
# Location
_ = line_dat.LocationIdentifiers.fetch_location_codes(update=True, verbose=verbose)
time.sleep(time_gap)
# Line of routes
_ = line_dat.LOR.get_keys_to_prefixes(prefixes_only=True, update=True, verbose=verbose)
_ = line_dat.LOR.get_keys_to_prefixes(prefixes_only=False, update=True, verbose=verbose)
_ = line_dat.LOR.get_lor_page_urls(update=True, verbose=verbose)
_ = line_dat.LOR.fetch_lor_codes(update=True, verbose=verbose)
_ = line_dat.LOR.fetch_elr_lor_converter(update=True, verbose=verbose)
time.sleep(time_gap)
# Line names
_ = line_dat.LineNames.fetch_line_names(update=True, verbose=verbose)
time.sleep(time_gap)
# Track diagrams
_ = line_dat.TrackDiagrams.fetch_sample_track_diagrams_catalogue(update=True, verbose=verbose)
time.sleep(time_gap)
other_assets = OtherAssets(update=True)
# Signal boxes
_ = other_assets.SignalBoxes.fetch_signal_box_prefix_codes(update=True, verbose=verbose)
_ = other_assets.SignalBoxes.fetch_non_national_rail_codes(update=True, verbose=verbose)
time.sleep(time_gap)
# Tunnels
_ = other_assets.Tunnels.fetch_railway_tunnel_lengths(update=True, verbose=verbose)
time.sleep(time_gap)
# Viaducts
_ = other_assets.Viaducts.fetch_railway_viaducts(update=True, verbose=verbose)
time.sleep(time_gap)
# Stations
_ = other_assets.Stations.fetch_railway_station_data(update=True, verbose=verbose)
time.sleep(time_gap)
# Depots
_ = other_assets.Depots.fetch_depot_codes(update=True, verbose=verbose)
time.sleep(time_gap)
# Features
_ = other_assets.Features.fetch_features_codes(update=True, verbose=verbose)
if verbose:
print("\nUpdate finished.")