In [13]:
import numpy as np
from numpy import inf
import pandas as pd
import glob
import os
import sys
np.set_printoptions(formatter={'float': lambda x: "{0:0.0f}".format(x)})

def main():
    
    # UPLOADS & INPUTS ===================================================================================
    
    # UPLOAD OF USER INPUTS CSV
    INPUTS = pd.read_csv('User Inputs.csv',skiprows=[0])
    INPUTS = list(INPUTS)
    print("INPUTS:")
    print(INPUTS)
    print('\n')
    
    # Upload of counties of interest CSV
    Counties = pd.read_csv(str(INPUTS[0]))
    Counties_header = list(Counties)
    Counties_header = Counties_header[0]
    Counties = list(Counties[Counties_header])
        
    # Subfolder/directory of the annual census data table CSVs
    Census_dir = str(INPUTS[1])
    
    # Subfolder/directory of the monthly county data table CSVs
    County_dir = str(INPUTS[2])
    
    # Ignoring excess header rows for county data
    CountySkipRows = int(INPUTS[3])-1
        
    # County time points of interest to the user
    CountyNumRows = int(INPUTS[4])-int(INPUTS[3])
    
    # County data points of interest to the user
    if (int(INPUTS[5])) < (int(INPUTS[6])):
        CountyUseCols = list(range((int(INPUTS[5])-1),(int(INPUTS[6]))))
    elif (int(INPUTS[5])) == (int(INPUTS[6])):
        CountyUseCols = int(INPUTS[5])
    elif (int(INPUTS[5])) > (int(INPUTS[6])):
        print("ERROR!!!")
        print("County Use Columns Last must be >= County Use Columns First!")
        sys.exit()
        
    # Census data points of interest to the user
    CensusUseCols = INPUTS[7:len(INPUTS)]
            
    if len(CountyUseCols) != (len(CensusUseCols))-2:
        print("ERROR!!!")
        print("The number of county and census data points are not equivalent!")
        sys.exit()
        
    # Upload of census and county CSVs
        # Uploads all files in the directory
    Census_files = glob.glob(os.path.join(Census_dir,'*.csv'))
    County_files = glob.glob(os.path.join(County_dir, '*.csv'))
    
    Counties_loaded = os.listdir(INPUTS[2])
    Counties_loaded = ([x.replace('.csv','') for x in Counties_loaded])
        
    if len(County_files) != len(Counties):
        print("ERROR!!!")
        print("The number of county data files uploaded does not match the user defined Counties list!")
        sys.exit()
    
    if (CountyNumRows/12) != len(Census_files):
        print("ERROR!!!")
        print("Number of years of census data is not equivalent to number of years of county data!")
        sys.exit()
    
    
    # OPERATIONS =========================================================================================
    
    
    # CALCULATING COUNTY SUMS FROM CENSUS TRACTS ---------------------------------------------------------
    
    # Dictionary of data points
    Census_dict = {}
    x = 0
    for Census_file in Census_files:
        Census_df = pd.read_csv(Census_file,skiprows=[1],usecols=CensusUseCols)
        Census_dict["Year {0}".format(x)] = Census_df
        x = x +1
    Census_indexes = list(Census_dict)
    
    CensusData_dict = {}
    CensusDataPoints = INPUTS[9:len(INPUTS)]
    x = 0
    for i in CensusDataPoints:
        Censusdata = np.zeros(((len(Counties)),(len(Census_indexes))))
        CensusData_dict["Data {0}".format(x)] = Censusdata
        x = x +1
    CensusData_indexes = list(CensusData_dict)
    
    ref1 = (Census_dict["Year 0"])
    ref2 = ref1[INPUTS[8]]
    x = 0
    for i in Census_indexes:
        comp1 = (Census_dict["Year "+str(x)])
        comp2 = comp1[INPUTS[8]]
        y = 0
        for i in comp2:
            if comp2[y] != ref2[y]:
                print("ERROR!!!")
                print("Census data rows are not in equivalent order! Consistently sort all census data!")
                sys.exit()
            y = y+1
        x = x+1
    
    # Sums of each data point of interest for all census tracts within each of the given counties
    x = 0
    for i in CensusDataPoints:
        Censusdata = np.zeros(((len(Counties)),(len(Census_indexes)))) 
        y = 0
        for i in Census_dict:
            Selected = Census_dict[Census_indexes[y]]
            z = 0
            for i in Counties:
                SelectedByCounty = (Selected[Selected[INPUTS[8]].str.contains(Counties[z] + " County")])
                CountySum = SelectedByCounty[INPUTS[9+x]]
                CountySum = sum(CountySum)
                Censusdata[z,y] = CountySum
                z = z+1
            y = y+1
        CensusData_dict[CensusData_indexes[x]] = Censusdata
        x = x+1
    
    
    # LOADING AND ORDERING OF COUNTY DATA ----------------------------------------------------------------
    
    CountyData_dict = {}
    x = 0
    for i in CountyUseCols:
        Countydata = np.zeros(((CountyNumRows),(len(Counties))))
        CountyData_dict["Data {0}".format(x)] = Countydata
        x = x+1
    
    # Reading the county data CSVs
    d = 0
    for i in CountyUseCols:
        hold = CountyData_dict["Data "+str(d)]
        y = 0 
        for i in County_files:
            County_df = pd.read_csv(i,skiprows=CountySkipRows+1,nrows=CountyNumRows,usecols=[CountyUseCols[d]],header=None)
            CtyDfLst = list(County_df[3+d])
            hold[:,y] = CtyDfLst
            y = y+1
        CountyData_dict["Data "+str(d)] = hold
        d = d+1
    
    # Sorting of lists of counties order and conforming to uploaded Counties order
    order = []
    x = 0
    for i in Counties_loaded:
        checker = Counties[x]
        order.append(Counties_loaded.index(checker))
        x = x+1
    CountyData_dict_ordered = {}
    d = 0
    for i in CountyUseCols:
        hold = np.zeros(((CountyNumRows),(len(Counties))))
        work1 = CountyData_dict["Data "+str(d)]
        x = 0
        for i in Counties:
            work2 = work1[:,(order[x])]
            hold[:,x] = work2
            x = x+1
        CountyData_dict_ordered["Data "+str(d)] = hold
        d = d+1
    
    
    # CALCULATING RATIOS - MONTHLY COUNTY DATA / ANNUAL CENSUS DATA BY COUNTY ----------------------------
    
    # Defining the ratios dictionary
    Ratios_dict = {}
    x = 0
    for i in CountyUseCols:
        Null = np.zeros(((CountyNumRows),(len(Counties))))
        Ratios_dict["Data {0}".format(x)] = Null
        x = x+1
    
    # Calculating the monthly County to County-from-Census ratios
    d = 0
    for i in CountyUseCols:
        Census_County = CensusData_dict["Data "+str(d)]
        County_Year = CountyData_dict_ordered["Data "+str(d)]   
        ratios = np.zeros(((CountyNumRows),(len(Counties))))
        x = 0
        k = 0
        l = 12
        for i in Census_indexes:
            CountyYear = County_Year[k:l,:]
            hold = CountyYear[:,:]/Census_County[:,x]
            ratios[k:l,:] = hold
            k = k+12
            l = l+12 
            x = x+1
        ratios[ratios == inf] = 1
        Ratios_dict["Data "+str(d)] = ratios
        Ratios_dict["Data "+str(d)] = np.transpose(Ratios_dict["Data "+str(d)])
        d = d+1
    
    
    # APPLYING ADJUSTMENTS TO CENSUS DATA ----------------------------------------------------------------
    
    # Defining the full census dictionary
    CensusFull_dict = {}
    x = 0
    for i in CountyUseCols:
        selected = Census_dict[Census_indexes[x]]
        Null = np.zeros(((len(selected)),(len(Census_indexes))))
        CensusFull_dict["Data {0}".format(x)] = Null
        x = x+1
    
    # Building the full census dictionary
    d = 0
    for i in CountyUseCols:
        UseCols_hold = INPUTS[9+d]
        x = 0
        for Census_file in Census_files:
            Census_df = pd.read_csv(Census_file,skiprows=[1],usecols=[UseCols_hold])
            hold = CensusFull_dict["Data "+str(d)]
            hold[:,x] = Census_df[INPUTS[9+d]]
            x = x+1
        CensusFull_dict["Data "+str(d)] = hold
        d = d+1
    
    # Defining the expanded census dictionary
        # Expanding the census data to include all time points
            # Here the value for each month is equivalent to the annual value
    CensusExpand_dict = {}
    x = 0
    for i in CountyUseCols:
        selected = Census_dict[Census_indexes[x]]
        Null = np.zeros(((len(selected)),(CountyNumRows)))
        CensusExpand_dict["Data {0}".format(x)] = Null
        x = x+1
    
    # Building the expanded census dictionary
    d = 0
    for i in CountyUseCols:
        Null = np.zeros(((len(selected)),(CountyNumRows)))
        OUT = Null
        x = 0
        y = 0
        z = 12
        for i in Census_indexes:
            hold = CensusFull_dict["Data "+str(d)]
            hold = hold[:,x]
            months = np.arange(0,12)
            month_hold = np.zeros((len(selected),12))
            p = 0
            for i in months:
                month_hold[:,p] = hold
                p = p+1
            OUT[:,y:z] = month_hold
            x = x+1
            y = y+12
            z = z+12
        CensusExpand_dict["Data "+str(d)] = OUT
        d = d+1
    
    
    # APPLYING ADJUSTMENTS TO RATIO DATA -----------------------------------------------------------------
    
    # Finding the census tract indexes of each county
    CT_CountyIndexes = []
    CTnames = Census_dict["Year 0"][INPUTS[8]]
    x = 0
    for i in Counties:
        BOOL = CTnames.str.contains(Counties[x] + " County")
        x = x+1
        BOOL = list(BOOL)
        # BOOL gives a list of length = census tracts showing True/False if iterated county appears
        CheckTrue = True
        indices = []
        for i in range(len(BOOL)):
            if BOOL[i] == CheckTrue:
                indices.append(i)
            # Indices gives a list of the census index numbers where the county names match
        CT_CountyIndexes.append(indices)
    
    # Defining the expanded ratios dictionary
        # Expanding the ratios data to include all census tracts
            # Here the value for each census tract is equivalent to the county ratio
    RatiosExpand_dict = {}
    x = 0
    for i in CountyUseCols:
        Null = np.zeros(((len(selected)),(CountyNumRows)))
        RatiosExpand_dict["Data {0}".format(x)] = Null
        x = x+1
    
    # Building the expanded ratios dictionary
    d = 0
    for i in CountyUseCols:
        Null = np.zeros(((len(selected)),(CountyNumRows)))
        x = 0
        for i in Counties:
            Null[CT_CountyIndexes[x],:] = Ratios_dict["Data "+str(d)][x,:]
            x = x+1
        RatiosExpand_dict["Data "+str(d)] = Null
        d = d+1
    
    # RESULTS --------------------------------------------------------------------------------------------
    
    # Defining the results dictionary
        
    Results_dict = {}
    x = 0
    for i in CountyUseCols:
        Null = np.zeros(((len(selected)),(CountyNumRows)))
        Results_dict["Data {0}".format(x)] = Null
        x = x+1
    
    # Building the results dictionary
    d = 0
    for i in CountyUseCols:
        x = 0
        Results_dict["Data "+str(d)] = RatiosExpand_dict["Data "+str(d)][:,:]*CensusExpand_dict["Data "+str(d)][:,:]
        d = d+1
    
    # Printing results
    d = 0
    for i in CountyUseCols:
        print('\n')
        print("Adjusted "+INPUTS[d+9])
        print(Results_dict["Data "+str(d)])
        d = d+1
    
if __name__ == '__main__':
    main()

INPUTS:
['Counties.csv', 'ACS', 'BLS', '11', '119', '4', '5', 'GEO_ID', 'NAME', 'DP03_0004E', 'DP03_0005E']








Adjusted DP03_0004E
[[2144 2135 2166 ... 2419 2436 2433]
 [106 105 107 ... 136 137 137]
 [160 159 161 ... 209 210 210]
 ...
 [760 772 774 ... 377 381 375]
 [621 630 632 ... 480 485 477]
 [2022 2053 2060 ... 2018 2038 2007]]


Adjusted DP03_0005E
[[216 211 200 ... 66 68 71]
 [23 23 22 ... 0 0 0]
 [0 0 0 ... 4 4 4]
 ...
 [56 56 53 ... 74 68 78]
 [111 111 106 ... 26 24 28]
 [308 308 292 ... 153 141 162]]
