In [8]:
# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''

import xlrd
import os
import csv
from zipfile import ZipFile

datafile = "2013_ERCOT_Hourly_Load_Data.xls"
outfile = "2013_Max_Loads.csv"


def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall()


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data = []
    # YOUR CODE HERE
    # Remember that you can use xlrd.xldate_as_tuple(sometime, 0) to convert
    # Excel date to Python tuple of (year, month, day, hour, minute, second)
    
    # ncols is a method in the sheet object
    # xlrd documentation - https://pythonhosted.org/xlrd3/sheet.html#Sheet.ncols
    number_of_cols = sheet.ncols
    
    for i in range(1,number_of_cols-1):
        my_dict = {}
        
        # get the value of the cell in the given row and column. Sheet.cell_value(rowx, colx)
        # xlrd documentation - https://pythonhosted.org/xlrd3/sheet.html#Shett.cell_value
        my_dict["Station"] = sheet.cell_value(0, i)
        
        # Sheet.col_values(colx, start_rowx=0, end_rowx=None)
        # Returns a slice of the values of the cells in the given column
        colum_values = sheet.col_values(i, 1)
        
        # getting the max value
        max_load = max(colum_values)
        my_dict["Max Load"] = max_load
        max_pos = colum_values.index(max_load) #position of max_load
        
        max_load_exceltime = sheet.cell_value(max_pos + 1,0)
        # xldate_as_tuple(xldate, datemode)
        # Convert an Excel number (presumed to represent a date, a datetime or a time) ...
        # ...into a tuple suitable for feeding to datetime or mx.DateTime constructors.
        max_load_time = xlrd.xldate_as_tuple(max_load_exceltime, 0)
        
        # Getting the values for year, month, day and hour from above
        my_dict["Year"] = max_load_time[0]
        my_dict["Month"] = max_load_time[1]
        my_dict["Day"] = max_load_time[2]
        my_dict["Hour"] = max_load_time[3]
        
        # Appending the values to the dictionary
        data.append(my_dict)
 
    return data

def save_file(data, filename):
    # YOUR CODE HERE
    with open(filename, "wb") as f:
        file_writer = csv.DictWriter(f, fieldnames=data[0].keys(), delimiter="|")
        
        # DictWriter.writeheader()
        # Write a row with the field names (as specified in the constructor)
        file_writer.writeheader()
        file_writer.writerows(data) # write all the rows to the file
        

    
def test():
    open_zip(datafile)
    data = parse_file(datafile)
    save_file(data, outfile)

    number_of_rows = 0
    stations = []

    ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
                        'Year': '2013',
                        'Month': '6',
                        'Day': '26',
                        'Hour': '17'}}
    correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
                        'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']

    with open(outfile) as of:
        csvfile = csv.DictReader(of, delimiter="|")
        for line in csvfile:
            station = line['Station']
            if station == 'FAR_WEST':
                for field in fields:
                    # Check if 'Max Load' is within .1 of answer
                    if field == 'Max Load':
                        max_answer = round(float(ans[station][field]), 1)
                        max_line = round(float(line[field]), 1)
                        assert max_answer == max_line

                    # Otherwise check for equality
                    else:
                        assert ans[station][field] == line[field]

            number_of_rows += 1
            stations.append(station)

        # Output should be 8 lines not including header
        assert number_of_rows == 8

        # Check Station Names
        assert set(stations) == set(correct_stations)

        
if __name__ == "__main__":
    test()


In [9]:
"""
def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data = {}
    # process all rows that contain station data
    for n in range (1, 9):
        station = sheet.cell_value(0, n)
        cv = sheet.col_values(n, start_rowx=1, end_rowx=None)

        maxval = max(cv)
        maxpos = cv.index(maxval) + 1
        maxtime = sheet.cell_value(maxpos, 0)
        realtime = xlrd.xldate_as_tuple(maxtime, 0)
        data[station] = {"maxval": maxval,
                         "maxtime": realtime}

    print data
    return data

def save_file(data, filename):
    with open(filename, "w") as f:
        w = csv.writer(f, delimiter='|')
        w.writerow(["Station", "Year", "Month", "Day", "Hour", "Max Load"])
        for s in data:
            year, month, day, hour, _ , _= data[s]["maxtime"]
            w.writerow([s, year, month, day, hour, data[s]["maxval"]])
            
"""

'\ndef parse_file(datafile):\n    workbook = xlrd.open_workbook(datafile)\n    sheet = workbook.sheet_by_index(0)\n    data = {}\n    # process all rows that contain station data\n    for n in range (1, 9):\n        station = sheet.cell_value(0, n)\n        cv = sheet.col_values(n, start_rowx=1, end_rowx=None)\n\n        maxval = max(cv)\n        maxpos = cv.index(maxval) + 1\n        maxtime = sheet.cell_value(maxpos, 0)\n        realtime = xlrd.xldate_as_tuple(maxtime, 0)\n        data[station] = {"maxval": maxval,\n                         "maxtime": realtime}\n\n    print data\n    return data\n\ndef save_file(data, filename):\n    with open(filename, "w") as f:\n        w = csv.writer(f, delimiter=\'|\')\n        w.writerow(["Station", "Year", "Month", "Day", "Hour", "Max Load"])\n        for s in data:\n            year, month, day, hour, _ , _= data[s]["maxtime"]\n            w.writerow([s, year, month, day, hour, data[s]["maxval"]])\n            \n'