In [37]:
# Calculates chargebacks

# input:  .csv file made in Excel containing journal of activites
#      date, time, chargeback category, comments
# output: report of date, chargeback category, total time
#
import os
import csv 
import sys
from numpy import genfromtxt
from operator import itemgetter


In [38]:
def listfiles(path):                  # prints a list of the files in the current path
    print("\n"+path)
    
    filelist = os.listdir(path)
    for i in filelist:
        if os.path.isfile(i):
            print("    "+i)

In [62]:
def getfoldername():             # get folder name that contains the data file
    
    # displays the current working directory
    # asks for the name of the directory that contains the input file.  
    # if the requested directory does not exists, a new one will be created.
    # returns the full path of the directory
       
    path = os.getcwd()
    print('Current path: '+ path)      
    listfiles(path)                   # prints a list of the files in the current path

    yesorno = str(raw_input("      Is the data in this folder? (y/n)"))           
    while (yesorno != 'y' and yesorno != 'Y'):
        path = str(raw_input("What folder is the data in? "))
        listfiles(path)                   # prints a list of the files in the current path
        yesorno = str(raw_input("    Is this the right path?  "+path))
        print(yesorno)
    if not os.path.exists(path):      
        os.makedirs(path)
    os.chdir(path)                    
    
    return(path)

In [60]:
def getfilename(path):             # get folder name that contains the data file
    
    # asks for the name of the input file.  
    # if the requested file does not exist ask again.
    # returns the full path of the file
       
    os.chdir(path)                    
    filename = str(raw_input("Which file contains the data? "))           
    while os.path.isfile(filename) == 0:
        filename = str(raw_input("That file does not exist.  Try again. (x to exit) "))
        if filename == 'x':
            print("exit requested")
            sys.exit(0)
    path = os.path.join(path,filename)
    return(path)

In [50]:
# open the file                         # copies the data from the file into a list of lists and deletes blank records
def opencsv(path,filename):
    fullname = os.path.join(path,filename)
    print(fullname)
    filepointer = open(fullname)          
    indata = list(csv.reader(filepointer))
    del indata[0]                      # delete the title line from the data
    i = 0
    while i < len(indata) :
        if indata[i][1] == "" :
            del indata[i]              # delete any lines without a time from the data
        else :
            i = i+1
    
    return(indata)
    

In [42]:
def qtrround(x):
    return round(x*4)/4

In [43]:
def calcduration(indata):            # calculates the duration of the task for all tasks listed in indata, and appends it to the list

    for i in range(0,len(indata)-1):                      # calculation is not possible for the last item since there is no 
                                                          # item to read the endtimme from.
        if indata[i][2] == "depart" :
            indata[i].append(0.0)                         # depart records have no duration
        elif indata[i][1] != "" :
            starttime = time2num(indata[i][1])
            endtime = time2num(indata[i+1][1])
            if starttime > 0 and endtime > starttime :
                duration = qtrround(endtime - starttime)
                indata[i].append(duration)                        # add the resulting duration to the end of it's list.
            else:
                print(indata[i])
                print("     ",indata[i+1])
                print("duration = ",duration)
                sys.exit(0)

    indata[i+1].append(0.0)                               # give the last line a duration of 0.0
    return(indata)

In [44]:
def time2num(timevalue):
    if timevalue != '':
        hour = float(timevalue.split(":")[0])

        minute = timevalue.split(":")[-1]
        minute = minute[0:2]
        minute = float(minute)/60                          # value of the minutes as a fraction of an hour
        
        amorpm = timevalue[-2]

        timevalue = hour + minute
        if amorpm == "P" and hour != 12:                                  # convert to 24 hour clock
            timevalue = timevalue +12.0
        elif amorpm == "A" and hour == 12:
            timevalue = timevalue -12.0
    else:
        timevalue = float("-1")                            # negative value signals the data is irrelevant
    return(timevalue)

In [45]:
def printlist(listdata):
    
    for i in range(0,len(listdata)):
        for j in range(0,len(listdata[i])):
            sys.stdout.write(str(listdata[i][j])+",    ")
        print(" \n")
            


In [46]:
def sumcategories(indata):
    date = indata[0][0]                              # reads first line date
    category = indata[0][3]                          # reads first line category
    catsums = []
    sum = 0.0                                        # initialize the sum
    for i in range(0,len(indata)-1):                 # for every line in the data, except the last line
        sum = sum + indata[i][-1]                            # sum the durations
        nextcategory = indata[i+1][3]
        nextdate = indata[i+1][0]                    # check to see if the next category& date are the same as this one.
        if category != nextcategory or date != nextdate:                             # if not,
            catsums.append([indata[i][0],indata[i][3],sum])       # store the sum for the current date & category
            sum = 0                                    # starts the new summation
        category = nextcategory
        date = nextdate
    return(catsums)       
        

In [61]:
# main program

path = getfoldername()
filename = getfilename(path) 
indata = opencsv(path,filename)


indata = calcduration(indata)
indata = sorted(indata, key=itemgetter(0,3))           # sorts by date, then category


catsums = sumcategories(indata)
printlist(catsums)


Current path: c:\Users\laughreyl\documents\Python_Scripts
----- file list ------

c:\Users\laughreyl\documents\Python_Scripts
    Daily Log.xlsx
    daily_log.csv
    daily_log_short.csv
    ~$Daily Log.xlsx
      Is the data in this folder? (y/n)y
Which file contains the data? daily_log_short.csv
c:\Users\laughreyl\documents\Python_Scripts\daily_log_short.csv
2/24/2016,    ,    0.0,     

2/24/2016,    Classes and training ,    2.0,     

2/24/2016,    GENIE Fly work ,    0.25,     

2/24/2016,    GI meetings ,    1.0,     

2/24/2016,    Heberlein imaging Jasper,    1.5,     

2/24/2016,    Huston imaging ,    1.75,     

2/24/2016,    Other imaging Martin,    0.5,     

2/24/2016,    PTR chores,    0.75,     

2/24/2016,    lunch,    0.5,     

2/24/2016,    who?,    0.25,     

2/25/2016,    ,    0.0,     

2/25/2016,    Classes and training ,    0.25,     

2/25/2016,    Dickson Courtship assay,    1.75,     

2/25/2016,    GI meetings ,    1.0,     

2/25/2016,    Heberlein imagi