In [1]:
import datetime
import csv

def map(fn, seq):
    res = ()

    for ele in seq:
        res = res + (fn(ele), )
    return res

def filter(pred, seq):
    res = ()

    for ele in seq:
        if pred(ele):
            res = res + (ele, )
    return res

# Mission 4 Code
def make_station(station_code, station_name):
    """Makes a `station` ADT"""
    return (station_code, station_name)

def get_station_code(station):
    """Gets the code from `station` ADT"""
    return station[0]

def get_station_name(station):
    """Gets the name from `station` ADT"""
    return station[1]

test_station1 = make_station('CC2', 'Bras Basah')
test_station2 = make_station('CC3', 'Esplanade')
test_station3 = make_station('CC4', 'Promenade')

def make_train(train_code):
    """Makes a `train` ADT"""
    return (train_code,)

test_train = make_train('TRAIN 0-0')

def get_train_code(train):
    """Gets the code from `train` ADT"""
    return train[0]

def make_line(name, stations):
    """Makes a `line` ADT"""
    return (name, stations)

def get_line_name(line):
    """Gets the name from `line` ADT"""
    return line[0]

def get_line_stations(line):
    """Gets the stations from `line` ADT"""
    return line[1]

def get_station_by_name(line, station_name):
    """Gets station from a `line` given the station name"""
    stns = get_line_stations(line)
    for i in stns:
        if station_name == get_station_name(i):
            return i
    return None

def get_station_by_code(line, station_code):
    """Gets station from a `line` given the station code"""
    stns = get_line_stations(line)
    for i in stns:
        if station_code == get_station_code(i):
            return i
    return None

def get_station_position(line, station_code):
    """Gets the index of a station on a `line`"""
    stns = get_line_stations(line)
    n = 0
    for i in stns:
        if station_code == get_station_code(i):
            return n
        n += 1
    return -1

test_line = make_line('Circle Line', (test_station1, test_station2, test_station3))

# Setter
def make_train_position(is_moving, from_station, to_station):
    """Makes a `train_position` ADT"""
    return (is_moving, from_station, to_station)

def get_is_moving(train_position) -> bool:
    """Gets if train is moving from `train_position`"""
    return train_position[0] 

def get_direction(line, train_position) -> int:
    """Gets direction of train from `train_position`. `1` if increasing, `0` if decreasing"""
    start_stn = get_station_position(line, get_station_code(train_position[1]))
    end_stn = get_station_position(line, get_station_code(train_position[2]))
    if start_stn < end_stn:
        return 0
    else:
        return 1

def get_stopped_station(train_position):
    """Gets the station the `train` is stopped at if not moving"""
    moving_status = get_is_moving(train_position)
    if not moving_status:
        return train_position[1]
    else:
        return None

def get_previous_station(train_position):
    """Gets the previous station the `train` departed from if moving"""
    moving_status = get_is_moving(train_position)
    if moving_status:
        return train_position[1]
    else:
        return None

def get_next_station(train_position):
    """Gets the next station the `train` is going to"""
    return train_position[2]

test_train_position1 = make_train_position(False, test_station1, test_station2)
test_train_position2 = make_train_position(True, test_station3, test_station2)

def make_schedule_event(train, train_position, time):
    """Makes a `schedule_event` ADT"""
    return (train, train_position, time)

def get_train(schedule_event):
    """Gets the `train` in the `schedule_event`"""
    return schedule_event[0]

def get_train_position(schedule_event):
    """Gets the `train_position` in the `schedule_event"""
    return schedule_event[1]

def get_schedule_time(schedule_event):
    """Gets the `datetime` object from `schedule_event`"""
    return schedule_event[2]

test_bd_event1 = make_schedule_event(test_train, test_train_position2, datetime.datetime(2016, 1, 1, 9, 27))
test_bd_event2 = make_schedule_event(test_train, test_train_position1, datetime.datetime(2016, 1, 1, 2, 25))

# Mission 5
## Task 1: Data Parsing
The `read_csv` helper function is provided to help you with this task.

In [2]:
def read_csv(csvfilename):
    rows = ()
    with open(csvfilename) as csvfile:
        file_reader = csv.reader(csvfile)
        for row in file_reader:
            rows += (tuple(row), )
    return rows

### Task 1a: Train Lines
`station_info.csv` contains basic information about all the train stations in Singapore. You can assume that stations on the same line will be on consecutive rows, and that the stations are listed in their actual order on the line.

In the template file, you will find a partial implementation of the function `parse_lines` which takes in a filename, reads the CSV file and generates a tuple of lines. In other words, the output of parse_lines should look like `(line1, line2, ...)` where `line1` and `line2` are also tuples.

Complete the function by inserting your code where the comments instruct you to. Your code should only be at the indentation level of the comments. Remember to use the getters and setters defined in Task 1.

The template file also has some code which calls your function and extracts the data for the Circle Line. Uncomment those lines after you have finished this task. You will need the `CCL` global variable for the rest of the tasks in order to avoid rereading the currently evaluated `Line`.

In [3]:
def parse_lines(data_file):
    rows = read_csv(data_file)[1:]
    lines = ()
    curr_line_name = rows[0][2]
    curr_line_stations = ()
    for row in rows:
        code, station_name, line_name = row
        if line_name == curr_line_name:
            curr_line_stations += (make_station(code, station_name),)
        else:
            lines += (make_line(curr_line_name, curr_line_stations),)
            curr_line_name = line_name
            curr_line_stations = ()
            curr_line_stations += (make_station(code, station_name),)
    # Addition 3
    lines += (make_line(curr_line_name, curr_line_stations),)
    print(lines)
    return(lines)

# UNCOMMENT THE CODE BELOW WHEN YOU ARE DONE WITH TASK 1A. THIS IS NOT OPTIONAL TESTING!
LINES = parse_lines('station_info.csv')
CCL = filter(lambda line: get_line_name(line) == 'Circle Line', LINES)[0]

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 1A
print("## Task 1a ##")
print(get_line_stations(CCL)[5:8])

# Expected Output #
# (('CC6', 'Stadium'), ('CC7', 'Mountbatten'), ('CC8', 'Dakota'))

(('North South Line', (('NS1', 'Jurong East'), ('NS2', 'Bukit Batok'), ('NS3', 'Bukit Gombak'), ('NS4', 'Choa Chu Kang'), ('NS5', 'Yew Tee'), ('NS7', 'Kranji'), ('NS8', 'Marsiling'), ('NS9', 'Woodlands'), ('NS10', 'Admiralty'), ('NS11', 'Sembawang'), ('NS13', 'Yishun'), ('NS14', 'Khatib'), ('NS15', 'Yio Chu Kang'), ('NS16', 'Ang Mo Kio'), ('NS17', 'Bishan'), ('NS18', 'Braddell'), ('NS19', 'Toa Payoh'), ('NS20', 'Novena'), ('NS21', 'Newton'), ('NS22', 'Orchard'), ('NS23', 'Somerset'), ('NS24', 'Dhoby Ghaut'), ('NS25', 'City Hall'), ('NS26', 'Raffles Place'), ('NS27', 'Marina Bay'), ('NS28', 'Marina South Pier'))), ('East West Line', (('EW1', 'Pasir Ris'), ('EW2', 'Tampines'), ('EW3', 'Simei'), ('EW4', 'Tanah Merah'), ('EW5', 'Bedok'), ('EW6', 'Kembangan'), ('EW7', 'Eunos'), ('EW8', 'Paya Lebar'), ('EW9', 'Aljunied'), ('EW10', 'Kallang'), ('EW11', 'Lavender'), ('EW12', 'Bugis'), ('EW13', 'City Hall'), ('EW14', 'Raffles Place'), ('EW15', 'Tanjong Pagar'), ('EW16', 'Outram Park'), ('EW17',

### Task 1b: Schedule Events
Complete the `parse_events_in_line` function. The function takes in a filename `data_file` and the currently evaluated `Line`. It should return a tuple of `ScheduleEvents`.

Note that the data fields from a CSV file are read as strings by the `read_csv` helper function. Also make sure that you follow the ADT specifications of `Train`, `TrainPosition`, and `ScheduleEvent` correctly.

We will use the `parse_events_in_line` function to first read the breakdown events. You may check events for `CCL` by using the code `parse_event_in_line('breakdown_events.csv', CCL)` to avoid rereading the parsed `Line`. The template file already has the code to do this. Uncomment those lines once you are done with this task.

In [4]:
def parse_events_in_line(data_file, line):
    rows = read_csv(data_file)[1:]
    events = ()
    for row in rows:
        train_code, is_moving, from_code, to_code, date, time = row
        the_train = make_train(train_code)
        the_is_moving = True if is_moving == "True" else False
        the_train_position = make_train_position(the_is_moving, get_station_by_code(line, from_code), get_station_by_code(line, to_code))
        the_year = int(date[6:10])
        the_month = int(date[3:5])
        the_day = int(date[0:2])
        the_hour = int(time[0:2])
        the_minute = int(time[3:5])
        events += (make_schedule_event(the_train, the_train_position, datetime.datetime(the_year, the_month, the_day, the_hour, the_minute)),)
    return events

# UNCOMMENT THE CODE BELOW WHEN YOU ARE DONE WITH TASK 1B. THIS IS NOT OPTIONAL TESTING!
BD_EVENTS = parse_events_in_line('breakdown_events.csv', CCL)

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 1B
print("## Task 1b ##")
print(BD_EVENTS[9])

# Expected Output #
# (('TRAIN 1-11',), (False, ('CC23', 'one-north'), ('CC22', 'Buona Vista')), datetime.datetime(2017, 1, 6, 7, 9))

## Task 1b ##
(('TRAIN 1-11',), (False, ('CC23', 'one-north'), ('CC22', 'Buona Vista')), datetime.datetime(2017, 1, 6, 7, 9))


## Task 2: Data Cleaning
### Task 2a: Breakdown Events Filtering

Unfortunately, the breakdown events in Task 1 were manually keyed in by an SMRT employee who was also moonlighting as a CS1010S grader and hasn’t had enough sleep in the past few weeks.

Some of the data is invalid! The “from” and “to” stations are not even adjacent to each other on the same line.

SMRT has also been conducting their own tests outside operating hours and the breakdown events from those tests are included in the file too. We don’t want to include these tests in our analysis, so we have to remove them too.

We only want to keep breakdown events that match the following criteria:

1. “From” and “To” stations are adjacent on the given Line. Note that while station codes contain running integers, you should not rely on the integer to determine adjacency. Instead, use the Station sequence stored in the given Line argument and the given function to get the position of the Station in Line.
2. Breakdown event occurs during operating hours (7am – 11pm), inclusive of 7am and 11pm. 

This sounds like a job for `filter`! `filter` takes in a predicate and an iterable, such as a tuple. 

`get_valid_events_in_line` is provided for you. Your task is to write the predicate function `is_valid_event_in_line` such that `get_valid_events_in_line` returns a tuple of valid breakdown events, as defined in the criteria above. After you are done with your task, uncomment the code provided in the template file so that the filter can be applied and all valid breakdown events can be stored in the global variable `VALID_BD_EVENTS`` to avoid rereading valid events.

In [5]:
def is_valid_event_in_line(bd_event, line):
    """Checks if `schedule_event` is valid"""
    def is_station_beside(the_line, from_stn, to_stn):
        from_pos = get_station_position(the_line, from_stn)
        to_pos = get_station_position(the_line, to_stn)
        if from_pos - to_pos == 1 or to_pos - from_pos == 1:
            return True
        else:
            return False
    def is_during_working_hours(the_time):
        testing_cri = datetime.time(7,0,0) <= the_time.time() <= datetime.time(23,0,0)
        if testing_cri == True:
            return True
        else:
            return False

    def extract_for_beside(the_line, the_event): 
        the_train_position = get_train_position(the_event)
        if get_is_moving(the_train_position) == False:
            prev_stn = get_station_code(get_stopped_station(the_train_position))
        else:
            prev_stn = get_station_code(get_previous_station(the_train_position))
        next_stn = get_station_code(get_next_station(the_train_position))
        return is_station_beside(the_line, prev_stn, next_stn)
    
    is_beside_test = extract_for_beside(line, bd_event) 
    working_hours_test = is_during_working_hours(get_schedule_time(bd_event))
    if is_beside_test and working_hours_test:
        return True
    else:
        return False

def get_valid_events_in_line(bd_events, line):
    ''' Do NOT modify this function'''
    return filter(lambda ev: is_valid_event_in_line(ev, line), bd_events)

# UNCOMMENT THE CODE BELOW WHEN YOU ARE DONE WITH TASK 2A. THIS IS NOT OPTIONAL TESTING!
VALID_BD_EVENTS = get_valid_events_in_line(BD_EVENTS, CCL)

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 2A
print("## Task 2a ##")
print(is_valid_event_in_line(test_bd_event1, CCL))
print(is_valid_event_in_line(test_bd_event2, CCL))

# Expected Output #
# True
# False

## Task 2a ##
True
False


### Task 2b: Computing Location IDs

Implement the function `get_location_id_in_line` that takes in a `ScheduleEvent` and uses its `TrainPosition` to compute a location ID that represents the location of the train corresponding to the `ScheduleEvent`.

Here’s how we will define the location ID:

Each station on the given Line corresponds to an integer according to its sequence. In fact, we have defined this function before. For example, CC1 → 0, CC2 → 1 in `CCL` global variable defined before.

If the schedule event was recorded when the train was stationary, the location ID will be the integer which corresponds to the station the train was stopped at.

If the schedule event was recorded when the train was in between two stations, the location ID will be denoted as (0.5 + the lower of the two station numbers). For example, if the schedule event was recorded between Stadium (CC6) and Mountbatten (CC7), the location ID is 5.5

In [6]:
def get_location_id_in_line(bd_event, line):
    the_train_pos = get_train_position(bd_event)
    # print(line)

    if get_is_moving(the_train_pos): # check if train is moving
        next_stn = get_station_code(get_next_station(the_train_pos))
        next_index = get_station_position(line, next_stn)
        # print(f"The station { next_stn } index is { next_index }")
        prev_stn = get_station_code(get_previous_station(the_train_pos))
        prev_index = get_station_position(line, prev_stn)
        # print(f"The station { prev_stn } index is { prev_index }")
        if get_direction(line, the_train_pos) == 1:
            # print(f"The train moving forward")
            return next_index + 0.5
        else:
            # print(f"The train moving backward")
            return prev_index + 0.5
    else:
        # print("The train not moving") # debug print
        prev_stn = get_station_code(get_stopped_station(the_train_pos))
        return(get_station_position(line, prev_stn))

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 2B
print("## Task 2b ##")
test_loc_id1 = get_location_id_in_line(test_bd_event1, CCL)
test_loc_id2 = get_location_id_in_line(test_bd_event2, CCL)
print(test_loc_id1)
print(test_loc_id2)

# Expected Output #
# 2.5
# 1

## Task 2b ##
2.5
1


## Task 3: Data Filtering
In this task, we will write some functions to filter the train schedule. Before we do that, we would need to read the entire train schedule data. Uncomment the code in the template file so that the entire train schedule is read using the `parse_events` function from Task 1(b) and stored in the global variable `FULL_SCHEDULE`. Note that this operation might take some time.
### Task 3a: Filter by Time
Implement the function `get_schedules_at_time` that takes in a tuple of `ScheduleEvents` and a Python `datetime.datetime`. You may assume that the `ScheduleEvent` given in the `train_schedule` belongs to the correct `Line` being evaluated currently. Hence, you do not need to check if the `ScheduleEvent` belongs to the current `Line`.

Your function should return a tuple of `ScheduleEvents` which occur at the given time.

In [7]:
# UNCOMMENT the following to read the entire train schedule
FULL_SCHEDULE = parse_events_in_line('train_schedule.csv', CCL)    # this will take some time to run

def get_schedules_at_time(train_schedule, time: datetime):
    """Filters by time"""
    results = filter(
        lambda x : get_schedule_time(x) == time,
        train_schedule
    )
    return results

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 3A
print("## Task 3a ##")
test_datetime = datetime.datetime(2017, 1, 6, 6, 0)
test_schedules_at_time = get_schedules_at_time(FULL_SCHEDULE[:5], test_datetime)
print(test_schedules_at_time[1])

# Expected Output #
# (('TRAIN 1-0',), (False, ('CC29', 'HarbourFront'), ('CC28', 'Telok Blangah')), datetime.datetime(2017, 1, 6, 6, 0))

## Task 3a ##
(('TRAIN 1-0',), (False, ('CC29', 'HarbourFront'), ('CC28', 'Telok Blangah')), datetime.datetime(2017, 1, 6, 6, 0))


### Task 3b: Filter by Location
Implement the function `get_rogue_schedules_in_line` that takes in a tuple of `ScheduleEvents` and a location ID as defined in Task 2(b).

Your function should return a tuple of `ScheduleEvents` whose positions are a maximum of 0.5 away from the given position in the given `Line`.

In [8]:
def get_schedules_near_loc_id_in_line(train_schedule, line, loc_id):
    # print(train_schedule)
    # print(loc_id)
    # for i in train_schedule:
    #     print(get_location_id_in_line(i, line))
    results = filter(
        lambda x : 0 <= (get_location_id_in_line(x, line) - loc_id) <= 0.5 or 0 <= (loc_id - get_location_id_in_line(x, line)) <= 0.5,
        train_schedule
    )
    return results

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 3B
print("## Task 3b ##")
test_schedules_near_loc_id = get_schedules_near_loc_id_in_line(FULL_SCHEDULE[:10], CCL, test_loc_id1)
print(test_schedules_near_loc_id[1])

# Expected Output #
# (('TRAIN 0-0',), (True, ('CC3', 'Esplanade'), ('CC4', 'Promenade')), datetime.datetime(2017, 1, 6, 6, 5))

## Task 3b ##
(('TRAIN 0-0',), (True, ('CC3', 'Esplanade'), ('CC4', 'Promenade')), datetime.datetime(2017, 1, 6, 6, 5))


### Task 3c: Filter by Time and Location
Let’s put the two functions from Tasks 3(a) and 3(b) together. Implement the function `get_rogue_schedules_in_line` that takes in a tuple of `ScheduleEvents`, a Python `datetime.datetime` and a position.

Your function should return a tuple of the `ScheduleEvents` which occur at the given time and whose location IDs are a maximum of 0.5 away from the given location ID.

Your code must make use of the two functions written earlier. **ZERO** marks will be awarded for solutions that do not call `get_schedules_at_time` and `get_schedules_near_loc_id_in_line`, or call and discard the results without using them.

In [9]:
def get_rogue_schedules_in_line(train_schedule, line, time, loc_id):
    tup_time = get_schedules_at_time(train_schedule, time)
    tup_schedule = get_schedules_near_loc_id_in_line(train_schedule, line, loc_id)
    results = filter(
        lambda x : x in tup_time,
        tup_schedule
    )
    return results

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 3C
print("## Task 3c ##")
test_bd_event3 = VALID_BD_EVENTS[0]
test_loc_id3 = get_location_id_in_line(test_bd_event3, CCL)
test_datetime3 = get_schedule_time(test_bd_event3)
test_rogue_schedules = get_rogue_schedules_in_line(FULL_SCHEDULE[1000:1100], CCL, test_datetime3, test_loc_id3)
print(test_rogue_schedules[2])

# Expected Output #
# (('TRAIN 1-11',), (True, ('CC24', 'Kent Ridge'), ('CC23', 'one-north')), datetime.datetime(2017, 1, 6, 7, 9))

## Task 3c ##
(('TRAIN 1-11',), (True, ('CC24', 'Kent Ridge'), ('CC23', 'one-north')), datetime.datetime(2017, 1, 6, 7, 9))


## Task 4: Finding the Rogue Train (10 marks)

We’ve now finished designing our ADTs, reading in the data, removing invalid entries and writing functions to help filter the train schedule data. What was it all for?

SMRT and GovTech have a hypothesis that the breakdowns are caused by a rogue train.

Let’s find that rogue train!

### Strategy
We will examine each of the breakdown events, one at a time, and see which other trains were nearby at the time when the breakdown event occurred. We will assign a “blame score” of 1 to a train each time it is found to be near a breakdown event. Once we have the total “blame score” of all the candidate trains, we will verify the rogue train hypothesis. If the rogue train hypothesis holds, we would go ahead and find the rogue train.

In order to help record and manage the “blame score” for each train, we have provided a Scorer ADT that you would use throughout Task 4.

### Scorer ADT
You do not need to understand how it works, but you do need to know how to use it.
- `make_scorer` returns a Python dictionary.
- `blame_train` takes in the `Scorer` and a train code. It increments the blame score of the given train (identified by the train code) by 1.
- `get_blame_scores` takes in the `Scorer` and returns a nested tuple of train codes and their corresponding blame scores.

Here is a sample run.

In [10]:
###############
# Scorer ADT  #
###############

def make_scorer():
    return {}

def blame_train(scorer, train_code):
    scorer[train_code] = scorer.get(train_code, 0) + 1
    return scorer

def get_blame_scores(scorer):
    return tuple(scorer.items())

# Use this to keep track of each train's blame score.
SCORER = make_scorer()

In [11]:
SCORER = make_scorer () # this is already done for you in the template
# Blame a bunch of trains for the breakdowns
blame_train ( SCORER , ' Train A ')
blame_train ( SCORER , ' Train B ')
blame_train ( SCORER , ' Train C ')
blame_train ( SCORER , ' Train A ')
blame_train ( SCORER , ' Train A ')
# Retrieve their blame scores .
get_blame_scores ( SCORER )
#=> (( ' Train A ', 3), (' Train B ', 1), (' Train C ', 1 ))

SCORER = make_scorer()

### Task 4a: Calculate Blame Scores (4 marks)
Write a function calculate_blame_in_line that takes in the full train schedule tuple, the tuple of valid breakdown events, the current Line and a given Scorer. The function then goes through all the valid breakdown events, finds which trains were in the vicinity at the time of the breakdown, and assigns 1 point of blame to each nearby train. Also remember not to double count the trains, as one train can only be blamed once for one event.

Step by step:
1. For each valid breakdown event, filter train schedule for trains which were nearby at the time when the breakdown event occurred.
2. For each train in the vicinity of each event, blame the train using the blame_train function of the already-defined scorer ADT.

The function should return the modified Scorer. Note that the Scorer should only be modified using the given Scorer ADT.

Hint: Make use of functions you have written in previous tasks, as well as the ADTs already defined. Also remember that with get_rogue_schedules, there can be multiple ScheduleEvent involving the same train.

Once you are done with this task, uncomment the code in the template file below the calculate_blame_in_line function definition so that the total blame score for all the candidate rogue trains is calculated using calculate_blame_in_line.

In [12]:
def calculate_blame_in_line(full_schedule, valid_bd_events, line, scorer):
    for i in valid_bd_events:
        the_time = get_schedule_time(i) # the time
        the_location = get_location_id_in_line(i, line) # the location
        print(f"{ the_time } at { the_location }")
        tuple_processing = get_rogue_schedules_in_line(
            full_schedule,
            line,
            the_time,
            the_location
        ) # filter the trains for that time and stuff

        tuple_to_clean = map(
            lambda x : get_train_code(get_train(x)),
            tuple_processing
        ) # get the train codes
        print(tuple_to_clean)

        cleaned_tuple = () # remove duplicates
        for i in tuple_to_clean:
            if i not in cleaned_tuple:
                cleaned_tuple += (i,)
            else:
                pass

        print(cleaned_tuple)

        print(f"{len(cleaned_tuple)} entries for { the_time } and { the_location }")

        for j in cleaned_tuple:
            blame_train(scorer, j)
            print(f"blamed { j }")

    return scorer

# UNCOMMENT THE CODE BELOW WHEN YOU ARE DONE WITH TASK 4A. THIS IS NOT OPTIONAL TESTING!
calculate_blame_in_line(FULL_SCHEDULE, VALID_BD_EVENTS, CCL, SCORER)

print(SCORER)

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 4A
print("## Task 4a ##")
print(sorted(get_blame_scores(SCORER))[7])

# Expected Answer
# ('TRAIN 0-5', 2)

2017-01-06 07:09:00 at 21.5
('TRAIN 0-4', 'TRAIN 0-4', 'TRAIN 1-11', 'TRAIN 1-11')
('TRAIN 0-4', 'TRAIN 1-11')
2 entries for 2017-01-06 07:09:00 and 21.5
blamed TRAIN 0-4
blamed TRAIN 1-11
2017-01-06 07:09:00 at 21
('TRAIN 0-4', 'TRAIN 1-11', 'TRAIN 1-11')
('TRAIN 0-4', 'TRAIN 1-11')
2 entries for 2017-01-06 07:09:00 and 21
blamed TRAIN 0-4
blamed TRAIN 1-11
2017-01-06 07:18:00 at 26.5
('TRAIN 0-3', 'TRAIN 0-4')
('TRAIN 0-3', 'TRAIN 0-4')
2 entries for 2017-01-06 07:18:00 and 26.5
blamed TRAIN 0-3
blamed TRAIN 0-4
2017-01-06 07:31:00 at 22.5
('TRAIN 0-4', 'TRAIN 0-8', 'TRAIN 0-8')
('TRAIN 0-4', 'TRAIN 0-8')
2 entries for 2017-01-06 07:31:00 and 22.5
blamed TRAIN 0-4
blamed TRAIN 0-8
2017-01-06 07:31:00 at 23
('TRAIN 0-4', 'TRAIN 0-4', 'TRAIN 0-8', 'TRAIN 0-8')
('TRAIN 0-4', 'TRAIN 0-8')
2 entries for 2017-01-06 07:31:00 and 23
blamed TRAIN 0-4
blamed TRAIN 0-8
2017-01-06 07:48:00 at 16.5
('TRAIN 0-4', 'TRAIN 0-4', 'TRAIN 1-2')
('TRAIN 0-4', 'TRAIN 1-2')
2 entries for 2017-01-06 07:48:0

### Task 4b: Find Max Score
Write a function `find_max_score` that takes in a `Scorer`. Using the `map` function, and Python’s built-in `max` function, **return** the maximum score.

You should not write your own loops for this task. However, you can use indexing to get the blame score from each `(‘Train Code’, blame_score)` tuple.

In [13]:
def find_max_score(scorer):
    # print(scorer)
    result = map(
        lambda x: x[1],
        get_blame_scores(scorer)
    )
    # print(result)
    return max(result)

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 4B
print("## Task 4b ##")
test_max_score = find_max_score(SCORER)
print(test_max_score)

# Expected answer
# 180

## Task 4b ##
180


### Task 4c: Verify the Rogue Train Hypothesis
Uncomment the code provided under Task 4c in the template file to view the blame scores for all candidate rogue trains. Do you think the hypothesis that the breakdown events are caused by a single rogue train holds? Explain.

Note that ZERO marks would be awarded for answers without reasonable explanation.

In [14]:
# UNCOMMENT THE CODE BELOW TO VIEW ALL BLAME SCORES. THIS IS NOT OPTIONAL TESTING!
print("## Task 4c ##")
train_scores = get_blame_scores(SCORER)
print("############### Candidate rogue trains ###############")
for score in train_scores:
    print("%s: %d" % (score[0], score[1]))
print("######################################################")

''' Please type your answer into the Task 4c textbox on Coursemology '''

## Task 4c ##
############### Candidate rogue trains ###############
TRAIN 0-4: 180
TRAIN 1-11: 17
TRAIN 0-3: 3
TRAIN 0-8: 14
TRAIN 1-2: 4
TRAIN 1-9: 16
TRAIN 0-0: 6
TRAIN 0-2: 13
TRAIN 0-7: 3
TRAIN 0-1: 7
TRAIN 1-7: 11
TRAIN 1-10: 1
TRAIN 0-6: 16
TRAIN 0-10: 10
TRAIN 0-12: 17
TRAIN 1-3: 7
TRAIN 1-6: 11
TRAIN 1-0: 10
TRAIN 1-1: 18
TRAIN 1-5: 4
TRAIN 0-5: 2
######################################################


' Please type your answer into the Task 4c textbox on Coursemology '

### Task 4d: Find the Rogue Train
Now that we know the maximum “blame score” and have also verified the rogue train hypothesis, we can finally find the rogue train (programmatically).

Write a function `find_rogue_train` that takes in the Scorer and the maximum score found in Task 4(b). The function should **return** the *train code* of the rogue train whose score matches the maximum score.

In [15]:
def find_rogue_train(scorer, max_score):
    result = filter(
        lambda x : x[1] == max_score,
        get_blame_scores(scorer)
    ) 
    return result[0][0] # using indexing to get the train code, the assumption is that there is only one rogue train

def find_rogue_train(scorer, max_score):
    the_tuple = get_blame_scores(scorer)
    for i in the_tuple:
        if i[1] == max_score:
            return i[0] 

# UNCOMMENT THE CODE BELOW TO TEST YOUR TASK 4D
print("## Task 4d ##")
print("Rogue Train is '%s'" % find_rogue_train(SCORER, test_max_score))

# Expected Answer
# Rogue Train is 'TRAIN 0-4'


## Task 4d ##
Rogue Train is 'TRAIN 0-4'
