-
Notifications
You must be signed in to change notification settings - Fork 3
/
dinkylinkbackup
230 lines (173 loc) · 7.25 KB
/
dinkylinkbackup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import os
import urllib
from google.appengine.api import users
from google.appengine.ext import ndb
import jinja2
import webapp2
from sys import argv
import datetime
import pickle
import sys
sys.path.insert(0, 'libs')
import BeautifulSoup
from bs4 import BeautifulSoup
import requests
import json
JINJA_ENVIRONMENT = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.dirname(__file__)),
extensions=['jinja2.ext.autoescape', 'jinja2.ext.loopcontrols'],
autoescape=True)
def getPost():
url = 'http://www.njtransit.com/sf/sf_servlet.srv?hdnPageAction=TrainSchedulesFrom'
pu_code = "124_PRIN"
ny_code = "105_BNTN"
prs = "Princeton"
nyp = "New York Penn Station"
# get date
today = datetime.date.today()
str_date = today.__format__("%m/%d/%Y")
# trip info
toNY_dict = {'selOrigin': pu_code, 'selDestination': ny_code, 'datepicker': str_date, 'OriginDescription': prs, 'DestDescription': nyp}
toPU_dict = {'selOrigin': ny_code, 'selDestination': pu_code, 'datepicker': str_date, 'OriginDescription': nyp, 'DestDescription': prs}
# get to webpage with data for the day
with requests.Session() as re:
toNY = re.post(url, data=toNY_dict)
toPU = re.post(url, data=toPU_dict)
toPUhtml = toPU.text
toNYhtml = toNY.text
return [toPUhtml, toNYhtml]
##Reads in html file and name of destination and outputs csv file with comma spliced file of train information
def getSoonestTrain(times):
from datetime import time, timedelta
noTrain = 'No more trains today!'
current = time()
for str_time in times:
parts = str_time.split(' ')
hrmin = parts[0].split(':')
hr = int(hrmin[0])-1
minute = int(hrmin[1])
if parts[1] == 'P.M.':
hr = hr+12
dt = time(hr, minute)
if dt > current:
hrdiff = hr-current.hour
mindiff = minute-current.min
if mindiff < minute:
hrdiff = hrdiff-1
str_diff = str(hrdiff) + ':' + str(mindiff)
return str_time
return noTrain
def scrape(html,destination):
soup = BeautifulSoup(html)
# Improvements: instead of being so hacky with 10 search for td
# Gather all lines in table
table1 = soup.find_all("tr")
table2 = table1[10] #table1[10] contains the table of interest
table3 = table2.find_all('span')
# Create 7 lists
origin = [] #Times for departure at origin
origintrain = []
transferarrive = [] #Times for arrival at transfer
transferdepart = [] #Time for departure at transfer
transfertrain = [] #Train or bus number
destination = [] #Time of arrival at destination
total = [] #Total time of Travel
#Create 3 Columns of Text File
origin.append("Origin Departure") #Times for departure at origin
origintrain.append("Origin Train")
transferarrive.append("Transfer Arrival") #Times for arrival at transfer
transferdepart.append("Transfer Departure") #Time for departure at transfer
transfertrain.append("Transfer Train or Bus")
destination.append("Destination Arrival") #Time of arrival at destination
total.append("Total Travel Time") #Total time of Travel
#Store 4 columns into 4 lists
#Regex and pull approapriate data
for i in range(4, len(table3)-3, 4):
#origin.append(str(table3[i].text)[0:len(table3[i].text)])
origin.append(str(table3[i].text)[0:8])
origintrain.append(str(table3[i].text)[-5:])
transferarrive.append(str(table3[i+1].text)[7:15])
transferdepart.append(str(table3[i+1].text)[39:48])
transfertrain.append(str(table3[i+1].text)[-5:])
destination.append(str(table3[i+2].text)[0:len(table3[i+2].text)])
total.append(str(table3[i+3].text)[0:len(table3[i+3].text)])
#text_file = open(str(title) + ".csv", "w")
Dict = {'origin': origin[1:], 'transferarrive' : transferarrive[1:], 'transferdepart': transferdepart[1:], 'destination':destination[1:]}
return Dict
#Create csv files for to Princeton and to New York
class njdata(ndb.Model):
originstring = ndb.StringProperty(repeated = True)
transferarrivestring = ndb.StringProperty(repeated = True)
transferdepartstring = ndb.StringProperty(repeated = True)
destinationstring = ndb.StringProperty(repeated = True)
date = ndb.DateTimeProperty(auto_now_add=True) #Need date to get most recent data
identifier = ndb.StringProperty()
globalPUDict = {}
globalNYDict = {}
toPUdata = njdata()
toNYdata = njdata()
class Test123(webapp2.RequestHandler):
def get(self):
#self.response.write(toPUdata)
#self.response.write(toNYdata)
toPUdata_query = toPUdata.query().order(-njdata.date)
a = toPUdata_query.fetch(2)
if a[0].identifier == 'NY':
ny = a[0]
pu = a[1]
else:
ny = a[1]
pu = a[0]
#toNYdata_query = toNYdata.query().order(-njdata.date)
#b = toNYdata_query.fetch(1)
#self.response.write(a[1])
#self.response.write(b)
global globalPUDict
globalPUDict = {'origin': pu.originstring, 'transferarrive': pu.transferarrivestring, 'transferdepart': pu.transferdepartstring, 'destination': pu.destinationstring}
global globalNYDict
globalNYDict = {'origin': ny.originstring, 'transferarrive': ny.transferarrivestring, 'transferdepart': ny.transferdepartstring, 'destination': ny.destinationstring}
#self.response.write(toPUdata)
#self.response.write(toNYdata)
class MainPage(webapp2.RequestHandler):
def get(self):
puSoon = getSoonestTrain(globalPUDict['origin'])
nySoon = getSoonestTrain(globalNYDict['origin'])
tempSoon = {'puSoon': puSoon, 'nySoon': nySoon}
template = JINJA_ENVIRONMENT.get_template('index.html')
self.response.write(template.render(tempSoon))
class TimedScraper(webapp2.RequestHandler):
def get(self):
toPUDict = scrape(getPost()[0], 'PU')
toNYDict = scrape(getPost()[1], 'NY')
global toPUdata
toPUdata.originstring = toPUDict['origin']
toPUdata.transferarrivestring = toPUDict['transferarrive']
toPUdata.transferdepartstring = toPUDict['transferdepart']
toPUdata.destinationstring = toPUDict['destination']
toPUdata.identifier = "PU"
global toNYdata
toNYdata.originstring = toNYDict['origin']
toNYdata.transferarrivestring = toNYDict['transferarrive']
toNYdata.transferdepartstring = toNYDict['transferdepart']
toNYdata.destinationstring = toNYDict['destination']
toNYdata.identifier = "NY"
#Save data into data models
toPUdata.put()
toNYdata.put()
self.response.write(toPUdata)
self.response.write(toNYdata)
class ToNY(webapp2.RequestHandler):
def get(self):
template = JINJA_ENVIRONMENT.get_template('toNY.html')
self.response.write(template.render(globalNYDict))
class ToPU(webapp2.RequestHandler):
def get(self):
template = JINJA_ENVIRONMENT.get_template('toPU.html')
self.response.write(template.render(globalPUDict))
application = webapp2.WSGIApplication([
('/', MainPage),
('/toNY', ToNY),
('/toPU', ToPU),
('/test', Test123),
('/scrape', TimedScraper)
], debug=True)