In [19]:
import csv

In [85]:
def prepare_datasets(file_path):
    """ 
    Accepts: path to a tab-separated plaintext file
    Returns: a list containing a dictionary for every row in the file, 
        with the file column headers as keys
    """
    
    with open(file_path) as infile:
        reader = csv.DictReader(infile, delimiter=',')
        list_of_dicts = [dict(r) for r in reader]
        
    return list_of_dicts


In [86]:
#list of dictionary
raw_grad_data = prepare_datasets("CLEAN_GRADUATION_RATE.csv") 
raw_fund_data = prepare_datasets("KY_COUNTY_EXPENDITURES.csv")

In [90]:
stats = {} #empty dict to hold stats per county
curr_county = "Adair County"
curr_grads = 0 #holder for total grads per county
curr_cohort = 0 #holder for total starting cohort per county

for item in raw_grad_data: #iterating through graduation data
    if item['GRADS4YR'] == '': #setting empty values to 0
        item['GRADS4YR'] = 0
    if item['GRADS5YR'] == '': #setting empty values to 0
        item['GRADS5YR'] = 0

    if item['\ufeffDIST_NAME'] == curr_county: #iterating through values in the same county
        #data separates students who took 4 or 5 years to graduate
        curr_grads += int(item['GRADS4YR']) + int(item['GRADS5YR']) #combining total graduates for year 
        curr_cohort += int(item['COHORT4YR']) + int(item['COHORT5YR']) #combining total starting cohort for year

    else:
        #saves graduation rate and cohort size for the previous county
        stats[curr_county] = {'grad_rate': round((curr_grads / curr_cohort) * 100, 1), 'size': curr_cohort}  
        curr_county = item['\ufeffDIST_NAME'] #iterates to next county
        curr_grads = int(item['GRADS4YR']) + int(item['GRADS5YR']) #initialize total graduates for year 
        curr_cohort = int(item['COHORT4YR']) + int(item['COHORT5YR']) #initialize total starting cohort for year


stats[curr_county] = {'grad_rate': round((curr_grads / curr_cohort) * 100, 1), 'size': curr_cohort} #saves for last value in stats dict

for item in raw_fund_data: #iterating through funding data
    if item['\ufeffDIST_NAME'] in stats: #if matching county already exists in stats dictionary
        stats[item['\ufeffDIST_NAME']]['funding'] = int(item['TOTALEXPENDITURES']) #adds total funding per county 
        



In [92]:
with open('data_ouput.csv', 'w', encoding='utf-8') as f:
    writer = csv.writer(f)
    #write a header row
    writer.writerow(('county',
                     'graduation_rate', 
                     'school_county_funding')) 
    
    #loop through our dataset and write it to the file, row by row
    for a in stats:
        if 'funding' in stats[a]: #only write for rows with funding data
            writer.writerow((a, stats[a]['grad_rate'], round(stats[a]['funding']/stats[a]['size']))) 
            #write county name, graduation rate, and calculate funding per student 
     
        