In [28]:
# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''

import xlrd
import os
import csv
from zipfile import ZipFile
import numpy as np

datafile = os.path.join('data', 'DataWrangling', "2013_ERCOT_Hourly_Load_Data.xls")
outfile = "2013_Max_Loads.csv"


def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall()


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    
    data = [sheet.col_values(col, start_rowx=0, end_rowx=None) for col in range(sheet.ncols)]

    # YOUR CODE HERE
    # Remember that you can use xlrd.xldate_as_tuple(sometime, 0) to convert
    # Excel date to Python tuple of (year, month, day, hour, minute, second)
    
        ### example on how you can get the data
    #sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]

    ### other useful methods:
    # print "\nROWS, COLUMNS, and CELLS:"
    # print "Number of rows in the sheet:", 
    # print sheet.nrows
    # print "Type of data in cell (row 3, col 2):", 
    # print sheet.cell_type(3, 2)
    # print "Value in cell (row 3, col 2):", 
    # print sheet.cell_value(3, 2)
    # print "Get a slice of values in column 3, from rows 1-3:"
    # print sheet.col_values(3, start_rowx=1, end_rowx=4)

    # print "\nDATES:"
    # print "Type of data in cell (row 1, col 0):", 
    # print sheet.cell_type(1, 0)
    # exceltime = sheet.cell_value(1, 0)
    # print "Time in Excel format:",
    # print exceltime
    # print "Convert time to a Python datetime tuple, from the Excel float:",
    # print xlrd.xldate_as_tuple(exceltime, 0)
    
    return data

def save_file(data, filename):
    
    output = list()
    for icol, col_data in enumerate(data[:-1]):
        if icol == 0:
            col_time = col_data
            output.append(['Station','Year','Month','Day','Hour','Max Load'])
            continue
        zone = col_data[0]
        zonemaxvalue = max(col_data[1:])
        zonemaxtime = col_time[col_data.index(zonemaxvalue)]
        zonemaxtimetuple = xlrd.xldate_as_tuple(zonemaxtime, 0)
        zone_list = [zone]
        zone_list.extend(list(zonemaxtimetuple)[:-2])
        zone_list.append(round(zonemaxvalue,1))
        output.append(zone_list)
        print zone, zonemaxvalue,zonemaxtimetuple 
    
    with open(outfile, 'wb') as f:
        csvwriter = csv.writer(f, delimiter='|')
        [csvwriter.writerow(r) for r in output]
    
    
def test():
    #open_zip(datafile)
    data = parse_file(datafile)
    save_file(data, outfile)
    number_of_rows = 0
    stations = []

    ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
                        'Year': '2013',
                        'Month': '6',
                        'Day': '26',
                        'Hour': '17'}}
    correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
                        'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']

    with open(outfile) as of:
        csvfile = csv.DictReader(of, delimiter="|")
        for line in csvfile:
            print line
            station = line['Station']
            if station == 'FAR_WEST':
                for field in fields:
                    # Check if 'Max Load' is within .1 of answer
                    if field == 'Max Load':
                        max_answer = round(float(ans[station][field]), 1)
                        max_line = round(float(line[field]), 1)
                        assert max_answer == max_line

                    # Otherwise check for equality
                    else:
                        assert ans[station][field] == line[field]

            number_of_rows += 1
            stations.append(station)

        # Output should be 8 lines not including header
        assert number_of_rows == 8

        # Check Station Names
        assert set(stations) == set(correct_stations)

        
if __name__ == "__main__":
    test()


COAST 18779.02551 (2013, 8, 13, 17, 0, 0)
EAST 2380.165409 (2013, 8, 5, 17, 0, 0)
FAR_WEST 2281.272214 (2013, 6, 26, 17, 0, 0)
NORTH 1544.770714 (2013, 8, 7, 17, 0, 0)
NORTH_C 24415.570227 (2013, 8, 7, 18, 0, 0)
SOUTHERN 5494.157645 (2013, 8, 8, 16, 0, 0)
SOUTH_C 11433.304916 (2013, 8, 8, 18, 0, 0)
WEST 1862.613765 (2013, 8, 7, 17, 0, 0)
{'Hour': '17', 'Month': '8', 'Station': 'COAST', 'Year': '2013', 'Day': '13', 'Max Load': '18779.0'}
{'Hour': '17', 'Month': '8', 'Station': 'EAST', 'Year': '2013', 'Day': '5', 'Max Load': '2380.2'}
{'Hour': '17', 'Month': '6', 'Station': 'FAR_WEST', 'Year': '2013', 'Day': '26', 'Max Load': '2281.3'}
{'Hour': '17', 'Month': '8', 'Station': 'NORTH', 'Year': '2013', 'Day': '7', 'Max Load': '1544.8'}
{'Hour': '18', 'Month': '8', 'Station': 'NORTH_C', 'Year': '2013', 'Day': '7', 'Max Load': '24415.6'}
{'Hour': '16', 'Month': '8', 'Station': 'SOUTHERN', 'Year': '2013', 'Day': '8', 'Max Load': '5494.2'}
{'Hour': '18', 'Month': '8', 'Station': 'SOUTH_C', 'Yea

Solution Udacity:

In [18]:
def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data = {}
    # process all rows that contain station data
    for n in range (1, 9):
        station = sheet.cell_value(0, n)
        cv = sheet.col_values(n, start_rowx=1, end_rowx=None)

        maxval = max(cv)
        maxpos = cv.index(maxval) + 1
        maxtime = sheet.cell_value(maxpos, 0)
        realtime = xlrd.xldate_as_tuple(maxtime, 0)
        data[station] = {"maxval": maxval,
                         "maxtime": realtime}

    print data
    return data

def save_file(data, filename):
    with open(filename, "w") as f:
        w = csv.writer(f, delimiter='|')
        w.writerow(["Station", "Year", "Month", "Day", "Hour", "Max Load"])
        for s in data:
            year, month, day, hour, _ , _= data[s]["maxtime"]
            w.writerow([s, year, month, day, hour, data[s]["maxval"]])