Find the time and value of max load for each of the regions COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character as the delimiter.

An example output can be seen in the "example.csv" file

In [4]:
# import libraries
import xlrd
import os
import csv
from zipfile import ZipFile

# set data files
datafile = "2013_ERCOT_Hourly_Load_Data.xls"
outfile = "2013_Max_Loads.csv"

# open given file + extract
def open_zip(datafile):
    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
        myzip.extractall()

#def parse_file(datafile):
    # open the workbook w/ xlrd library's open_workbook method and get the 1st sheet
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)

data = {}
headers = ['Station','Year','Month','Day','Hour','Max Load']

# for each of the stations
for i in range(1,9):
    # grab the station name from the value in current column n in the 1st row
    station = sheet.cell_value(0,i)
    
    # grab all values in current column n from 2nd row until the end
    col_values = sheet.col_values(i, start_rowx = 1, end_rowx = None)
    
    # get max value from current column n's values
    max_col_value = round(max(col_values),0)
    
    # get index of this max value from column n
    max_col_value_row_pos = col_values.index(max(col_values),0) + 1
    
    # get the date time for this max value from current column n
    max_col_value_time = xlrd.xldate_as_tuple(sheet.cell_value(max_col_value_row_pos,0), 0)
    
    # input grabbed data into dictionary
    data[station] = {'maxdate': max_col_value_time,
                    'maxvalue': max_col_value}
    
# open file to write to 
with open(outfile, 'wt', newline='') as csvfile:
    
    data_writer = csv.writer(csvfile, delimiter ='|',quoting=csv.QUOTE_MINIMAL)
    
    # write in headers as own row
    data_writer.writerow(headers)
    
    # for each kv-pair in the data dictionary
    for s in data:
        # unpack the dictionary's tuple value for the max value's date
        year, month, day, hour, min, sec = data[s]['maxdate']
        
        # get the max value
        value = data[s]['maxvalue']
        
        # write station, specified date field, and max value to the file as a row
        data_writer.writerow([s, year, month, day, hour, value])

In [48]:
# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''

import xlrd
import os
import csv
from zipfile import ZipFile

datafile = "2013_ERCOT_Hourly_Load_Data.xls"
outfile = "2013_Max_Loads.csv"


def open_zip(datafile):
    with ZipFile('{0}'.format(datafile), 'r') as myzip:
        myzip.extractall()


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)

    data = {}
    headers = ['Station','Year','Month','Day','Hour','Max Load']

    # for each desired column
    for i in range(1,len(headers)):
        # grab the station name from the value in current column n in the 1st row
        station = sheet.cell_value(0,i)
    
        # grab all values in current column n from 2nd row until the end
        col_values = sheet.col_values(i, start_rowx = 1, end_rowx = None)
    
        # get max value from current column n's values
        max_col_value = round(max(col_values),0)
    
        # get index of this max value from column n
        max_col_value_row_pos = col_values.index(max(col_values),0) + 1
    
        # get the date time for this max value from current column n
        max_col_value_time = xlrd.xldate_as_tuple(sheet.cell_value(max_col_value_row_pos,0), 0)
    
        # input grabbed data into dictionary
        data[station] = {'maxdate': max_col_value_time,
                        'maxvalue': max_col_value}
    
        return data
        
def save_file(data, filename):
    # open file to write to 
    with open(outfile, 'wt', newline='') as csvfile:
    
        data_writer = csv.writer(csvfile, delimiter ='|',quoting=csv.QUOTE_MINIMAL)
    
        # write in headers as own row
        data_writer.writerow(headers)
        
        # for each kv-pair in the data dictionary
        for s in data:
            # unpack the dictionary's tuple value for the max value's date
            year, month, day, hour, min, sec = data[s]['maxdate']
        
            # get the max value
            value = data[s]['maxvalue']
        
            # write station, specified date field, and max value to the file as a row
            data_writer.writerow([s, year, month, day, hour, value])
    
def test():
    open_zip(datafile)
    data = parse_file(datafile)
    save_file(data, outfile)

    number_of_rows = 0
    stations = []

    ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
                        'Year': '2013',
                        'Month': '6',
                        'Day': '26',
                        'Hour': '17'}}
    correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
                        'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']

    with open(outfile) as of:
        csvfile = csv.DictReader(of, delimiter="|")
        for line in csvfile:
            station = line['Station']
            if station == 'FAR_WEST':
                for field in fields:
                    # Check if 'Max Load' is within .1 of answer
                    if field == 'Max Load':
                        max_answer = round(float(ans[station][field]), 1)
                        max_line = round(float(line[field]), 1)
                        assert max_answer == max_line

                    # Otherwise check for equality
                    else:
                        assert ans[station][field] == line[field]

            number_of_rows += 1
            stations.append(station)

        # Output should be 8 lines not including header
        assert number_of_rows == 8

        # Check Station Names
        assert set(stations) == set(correct_stations)

        
if __name__ == "__main__":
    test()


XLRDError: ZIP file contents not a known type of workbook