Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: snktagarwal/IRN
base: 4ff89c9636
...
head fork: snktagarwal/IRN
compare: 3f2b13e460
  • 4 commits
  • 1 file changed
  • 0 commit comments
  • 1 contributor
Showing with 186 additions and 96 deletions.
  1. +186 −96 delay_profile/TrainModel.py
View
282 delay_profile/TrainModel.py
@@ -39,6 +39,28 @@ def __init__(self, tr_no, tr_nm, stn_nm, sch_arr, del_arr, act_arr, sch_dep, del
self.del_dep = del_dep
self.src_dist = src_dist
+ def flush(self):
+
+ # Flushes information about arrival and departures
+ # It is used to create a new time table containing delay information
+
+ self.instances = 0
+ self.avg_arr = self.avg_dep = -1 # That's all we really need!
+
+ def add_delay(self, stn):
+
+ """ Adds the delayed arrival and departure times """
+ self.instances = self.instances + 1
+ self.avg_arr = self.avg_arr + stn.act_arr
+ self.avg_dep = self.avg_dep + stn.act_dep
+
+ def avg(self):
+
+ """ Averages the arr and dep times, look at the above funciton """
+ if self.instances:
+ self.avg_arr = int(float(self.avg_arr)/self.instances)
+ self.avg_dep = int(float(self.avg_dep)/self.instances)
+
def _print(self):
print 'Train Name: ' + str(self.tr_nm)
print 'Train No.: ' + str(self.tr_no)
@@ -353,7 +375,6 @@ def processTrain(self, train):
else:
# Process for hourly stats
self.processHourly(train, lidx, ridx)
- # Process for cumulative total stats
self.daily_traffic[train.tr_no] = 1
class Indexing:
@@ -392,7 +413,6 @@ def constructTimeTableIndex(self):
# Read each station
while parts:
[stn_code, arr_time, dep_time, dist_source] = parts[0:4]
- print parts[0:4]
arr_time = util.toMin(arr_time)
dep_time = util.toMin(dep_time)
parts = parts[4:]
@@ -541,8 +561,31 @@ def hourlyAverageTrafficPS(self):
# print self.hourly_traffic_dict
+ def hourlyAverageTrafficPZ(self):
+
+ """ This function takes the day's snapshot and finds the average
+ traffic in hour slots caused in a zone """
+
+ self.hourly_traffic_dict_z = []
+
+ for zone in Segments.all_belts:
+
+ # Extract the list of segments in the zone
+ seg_zone = filter(lambda x: x.seg in zone, self.seg_set)
+
+ # Now average hours over all these segments
+ t1 = {}
+ for k in seg_zone[0].hourly_delay.keys():
+ t1[k] = sum([sum(seg.hourly_delay[k].values()) for seg in seg_zone])
+
+ self.hourly_traffic_dict_z.append(t1)
+
+
def hourlyAverageDelayPS(self):
+ """ This function takes the day's snapshot and finds the average
+ delay per train in hour slots caused in a segment """
+
self.hourly_delay_dict = []
t_delay= map(lambda x: x.hourly_delay, self.seg_set)
@@ -554,6 +597,32 @@ def hourlyAverageDelayPS(self):
else: t1[k] = 0
self.hourly_delay_dict.append(t1)
+ def hourlyAverageDelayPZ(self):
+
+ """ This function takes the day's snapshot and finds the average
+ delay per train in hour slots caused in a zone """
+
+ self.hourly_delay_dict_z = []
+
+ for zone in Segments.all_belts:
+
+ # Extract the list of segments in the zone
+ seg_zone = filter(lambda x: x.seg in zone, self.seg_set)
+
+ # Now average hours over all these segments
+ t1 = {}
+ for k in seg_zone[0].hourly_delay.keys():
+ num = 0
+ den = 0
+ for seg in seg_zone:
+ num = num + sum(seg.hourly_delay[k].values())
+ den = den + len(seg.hourly_delay[k].keys())
+ if den > 0:
+ t1[k] = float(num)/den
+ else: t1[k] = 0
+
+ self.hourly_delay_dict_z.append(t1)
+
def constructTrainIndex(self, blk_size):
""" Construct an Index from files read everyday using running information """
@@ -639,74 +708,93 @@ def index(self):
idx = Indexing(fn)
idx.constructTrainIndex(10)
idx.constructSegmentsIndex()
- #idx.normalizeDelay()
self.idx_list.append(idx)
- def hourVsSegmentDelayMat(self, p = True):
+ def avgOfMatrices(self, mat_list, m, n):
- """ Constructs a segment vs hours matrix ( 54 x 12 ) which contains
- Delay metrics """
+ """ Averages m x n matrices specified in mat_list. For example if
+ m1 = [[1,2],[1,2]] and m2 = [[2,2],[3,3]] then avg(m1, m2) is but:
+ m_avg = [[1.5 ,2],[2 ,2.5]]"""
- self.seg_hour_delay_mat = None
+ # Initialize
+ m_avg = copy.deepcopy(mat_list[0])
+ l = len(mat_list)
- for idx in self.idx_list:
+ for mat in mat_list[1:]:
+ for i in range(m):
+ for j in range(n):
+ m_avg[i][j] += mat[i][j]
- # Create a copy if still none
- if not self.seg_hour_delay_mat:
- self.seg_hour_delay_mat = copy.deepcopy(idx.hourly_delay_dict)
- continue
+ for i in range(m):
+ for j in range(n):
+ m_avg[i][j] = float(m_avg[i][j])/l
- # Update
- for i in range(len(Segments.all_segments)):
- for j in range(len(self._hours)):
- self.seg_hour_delay_mat[i][j] += idx.hourly_delay_dict[i][j]
+ return m_avg
- # Average
- for i in range(len(Segments.all_segments)):
- for j in range(len(self._hours)):
- self.seg_hour_delay_mat[i][j] = float(self.seg_hour_delay_mat[i][j])/len(self.idx_list)
+
+ def hourVsSegmentDelayMat(self, p = False):
+
+ """ Constructs a segment vs hours matrix ( 54 x 12 ) which contains
+ Delay metrics """
+
+ mat_list = map(lambda x: x.hourly_delay_dict, self.idx_list)
+ mat_avg = self.avgOfMatrices(mat_list, len(Segments.all_segments), len(self._hours))
+
+ self.seg_hour_delay_mat = mat_avg
# If pickle is true then we save the matrix in a picked file
if p:
pickle.dump(self.seg_hour_delay_mat, open('pickled/SegHourDelayMat.p','w'))
- def hourVsSegmentTrafficMat(self, p = True):
- """ Constructs a segment vs hours matrix ( 54 x 12 ) which contains
+ def hourVsZoneDelayMat(self, p = False):
+
+ """ Constructs a zone vs hours matrix ( 6 x 12 ) which contains
Delay metrics """
- self.seg_hour_traffic_mat = None
+ mat_list = map(lambda x: x.hourly_delay_dict_z, self.idx_list)
+ mat_avg = self.avgOfMatrices(mat_list, len(Segments.all_belts), len(self._hours))
- for idx in self.idx_list:
+ self.zone_hour_delay_mat = mat_avg
+
+ # If pickle is true then we save the matrix in a picked file
+ if p:
+ pickle.dump(self.zone_hour_delay_mat, open('pickled/ZoneHourDelayMat.p','w'))
- # Create a copy if still none
- if not self.seg_hour_traffic_mat:
- self.seg_hour_traffic_mat = copy.deepcopy(idx.hourly_traffic_dict)
- continue
- # Update
- for i in range(len(Segments.all_segments)):
- for j in range(len(self._hours)):
- self.seg_hour_traffic_mat[i][j] += idx.hourly_traffic_dict[i][j]
+ def hourVsSegmentTrafficMat(self, p = False):
- # Average
- for i in range(len(Segments.all_segments)):
- for j in range(len(self._hours)):
- self.seg_hour_traffic_mat[i][j] = float(self.seg_hour_traffic_mat[i][j])/len(self.idx_list)
+ """ Constructs a segment vs hours matrix ( 54 x 12 ) which contains
+ Delay metrics """
+
+ mat_list = map(lambda x: x.hourly_traffic_dict, self.idx_list)
+ mat_avg = self.avgOfMatrices(mat_list, len(Segments.all_segments), len(self._hours))
+ self.seg_hour_traffic_mat = mat_avg
if p:
pickle.dump(self.seg_hour_traffic_mat ,open('pickled/SegHourTrafficMat.p','w'))
+ def hourVsZoneTrafficMat(self, p = False):
+ """ Constructs a zone vs hours matrix ( 54 x 12 ) which contains
+ Traffic metrics """
+
+ mat_list = map(lambda x: x.hourly_traffic_dict_z, self.idx_list)
+ mat_avg = self.avgOfMatrices(mat_list, len(Segments.all_belts), len(self._hours))
+ self.zone_hour_traffic_mat = mat_avg
+
+ # If pickle is true then we save the matrix in a picked file
+ if p:
+ pickle.dump(self.zone_hour_traffic_mat, open('pickled/ZoneHourTrafficMat.p','w'))
+
def averageTrafficPS(self):
- """ Total hourly traffic for each hour slot using the master matrix """
+ """ Total hourly traffic for each segment using the master matrix """
self.total_average_traffic_ps = \
map(lambda x: sum(x.itervalues()), self.seg_hour_traffic_mat)
if p:
pickle.dump(file('SegHourDelayMat.p','w'), self.seg_hour_delay_mat)
-
def averageTrafficPHS(self):
""" Total hourly traffic for each hour slot using the master matrix """
@@ -721,7 +809,7 @@ def averageTrafficPHS(self):
self.total_average_traffic_phs.append(val)
def averageDelayPS(self):
- """ Total hourly traffic for each hour slot using the master matrix """
+ """ Total hourly delay for each segment using the master matrix """
self.total_average_delay_ps = \
map(lambda x: sum(x.itervalues()), self.seg_hour_delay_mat)
@@ -739,80 +827,82 @@ def averageDelayPHS(self):
val += seg[j]
self.total_average_delay_phs.append(val)
- def plotHourlyTraffic(self):
-
- """ For a first view let us create segmentsxhours insatances of
- delay. And plot them with x axis as hours and y axis as the corresponding
- delay. """
-
- f = file('plots/HourlyTrafficOfAllSegments','w')
-
- for hs in range(len(self._hours)):
- for e1 in self.total_hourly_traffic:
- f.write(str(self._hours[hs]/self._split)[:5]+' '+str(e1[hs])[:5]+'\n')
+ def constructTimeTableFromDelay(self):
- def plotHourlyDelay(self):
+ """ It produces a time table, exactly in the format currently specified
+ in datasets/NewTimTableDetail.txt.
- """ For a first view let us create segmentsxhours insatances of
- delay. And plot them with x axis as hours and y axis as the corresponding
- delay. """
+ The way it does is to first use each indexing frame and then find out the
+ average time at which a train reaches a station. This gives us augmented
+ information about a train's delyed reaching time. """
- f = file('plots/HourlyDelayOfAllSegments','w')
+ # First read the existing time table to get information about trains
+ # and the corresponding stations they stop at
- for hs in range(len(self._hours)):
- for e1 in self.total_hourly_delay:
- f.write(str(self._hours[hs]/self._split)[:5]+' '+str(e1[hs])[:5]+'\n')
- def plotTrafficVsDelayHourly(self):
+ time_table = {}
- """ Construct a plot where x-axis is traffic, y-axis is delay and
- color coding is done according to the hour. The simple way to do
- this will be to, construct len(self._hours) file each containing a
- list of traffic-vs-delay statistics (len(Segments.__all_segments))
- actually . """
+ handle = Indexing('datasets/NewTrainStationDetail.txt')
+ tt_idx = handle.constructTimeTableIndex()
+ output = file('datasets/NewTrainStationDetailDelay.txt', 'w')
- base = 'traffic-vs-delay/'
+ # This is a simple index created from the actual time table
+ # Let's flush it or adding delay information
- for hs in range(len(self._hours)):
- f = file(base + str(hs) + '.dat', 'w')
- for i in range(len(Segments.all_segments)):
- f.write("%f\t%f\n" % (self.total_hourly_traffic[i][hs], self.total_hourly_delay[i][hs]))
- f.close()
-
- def plotHourlyDelayPS(self):
-
- """ For each segment create points for each bin and store in file """
-
- for i in range(len(self.total_hourly_delay)):
- f = file('plots/'+Segment.getNameStat(Segments.all_segments[i])+'.hourly_delay','w')
- for hs in range(len(self._hours)):
- f.write(str(hs)+' '+str(self.total_hourly_delay[i][hs])+'\n')
- f.close()
+ for (tr_no, train) in tt_idx.iteritems():
+ for stn in train.stn_list:
+ stn.flush()
- def plotHourlyTrafficPS(self):
+ # Now lets add actual information from the indexed trains that we have
+ # got.
- """ For each segment create points for each bin and store in file """
+ for idx in self.idx_list:
- for i in range(len(self.total_hourly_traffic)):
- f = file('plots/'+Segment.getNameStat(Segments.all_segments[i])+'.hourly_traffic','w')
- for hs in range(len(self._hours)):
- f.write(str(hs)+' '+str(self.total_hourly_traffic[i][hs])+'\n')
- f.close()
+ # Take the index and add information to our time table
+ for (tr_no, train) in idx.idx.iteritems():
+ if tr_no not in tt_idx.keys():
+ #print tr_no, 'not found in static rail database'
+ continue
- def corrHourlyTrafficDelayPS(self):
+ # It might happen that some trains are not collected completely
+ # in which case the number of stations are not complete,
+ # hence we need to align the stations appropriately
- """ Compute the delay and traffic vectors per hour and compute
- the associated correlation """
+ le = len(tt_idx[tr_no].stn_list)
+ align_vec = [-1] * len(train.stn_list)
- corr_vec = map(lambda x: pearsonr( \
- self.total_hourly_traffic[x], self.total_hourly_delay[x]), \
- range(len(self.total_hourly_traffic)))
+ #print tr_no, le, len(train.stn_list)
+ j = 0
+ for i in range(le):
- for i in range(len(Segments.all_segments)):
+ tt_code = Station.disambiguate(tt_idx[tr_no].stn_list[i].stn_code)
+ idx_code = Station.disambiguate(train.stn_list[j].stn_code)
+ if tt_code == idx_code:
+ align_vec[j] = i
+ j = j + 1
- print Segment.getNameStat(Segments.all_segments[i]) \
- + str(' ') + str(corr_vec[i][0])
+ if j == len(train.stn_list): break
+ # Now add the arr and dep time
+ for j in range(len(train.stn_list)):
+ if align_vec[j] != -1:
+ tt_idx[tr_no].stn_list[align_vec[j]].add_delay(train.stn_list[j])
+ # Take god's name and average
+ for (tr_no, train) in tt_idx.iteritems():
+ for stn in train.stn_list:
+ stn.avg()
+ # Let's see how the average go :P
+ for (tr_no, train) in tt_idx.iteritems():
+ output.write(tr_no+'||')
+ for stn in train.stn_list:
+ if not (stn.avg_arr == -1 and stn.avg_dep == -1):
+ output.write(stn.stn_code+'||'+str(stn.avg_arr)+'||'+str(stn.avg_dep)+'||'+str(stn.src_dist)+'||')
+ output.write('\n')
+ output.close()
+
+ handle.augmentTimeTableWithSegments()
+ handle.printAugmentedTimeTable('datasets/NewTrainStationDetailWDelayWSegments.txt')
+ handle.printAugmentedTimeTableP('pickled/NewTrainStationDetailWDelayWSegments.p')

No commit comments for this range

Something went wrong with that request. Please try again.