In [10]:
import pandas
import re
from os import listdir

recipHeader = ["StateCode", "CountyCode", "CustomerID", "LastName", "FirstName", "MiddleName", "SuffixName", "NameFormat",
               "NameType", "MailingAdd1", "MailingAdd2", "City", "State", "Zip", "Barcode"]

recipMatch = re.compile(r"CAS\.WDC11019\.NA[0-9]{2}\.FINAL.DT11186\.csv")

In [31]:
def directoryMatch(dired, regex):
    matchFiles = []
    for fName in listdir(dired):
        if regex.match(fName) != None:
            matchFiles.append(dired + "/"+ fName)
    matchFiles.sort()
    return matchFiles

In [32]:
recipFiles = directoryMatch("data", recipMatch)

['data/CAS.WDC11019.NA08.FINAL.DT11186.csv',
 'data/CAS.WDC11019.NA09.FINAL.DT11186.csv',
 'data/CAS.WDC11019.NA10.FINAL.DT11186.csv',
 'data/CAS.WDC11019.NA11.FINAL.DT11186.csv']

In [21]:
recip = pandas.read_csv("/home/rfulop/workspace/allegheny-food/data/CAS.WDC11019.NA11.FINAL.DT11186.csv", 
                        sep=";", header=None, names=recipHeader)
recip = recip.set_index("CustomerID")

In [15]:
locCodeHeader = ["StateCode", "CountyCode", "OfficeName", "City", "State"]
locCode = pandas.read_csv("/home/rfulop/workspace/allegheny-food/data/foia_state_county_codes.csv",
                          header=0, names=locCodeHeader)
locCode.set_index(["StateCode", "CountyCode"])
locCode

Unnamed: 0,StateCode,CountyCode,OfficeName,City,State
0,1,0,Alabama State USDA Office,Montgomery,AL
1,1,1,Autauga County USDA Serv Cntr,Autaugaville,AL
2,1,3,Baldwin County USDA Serv Cntr,Bay Minette,AL
3,1,5,Barbour County USDA Serv Cntr,Clayton,AL
4,1,7,Bibb County FSA Office,Marion,AL
5,1,9,Blount County USDA Serv Cntr,Oneonta,AL
6,1,11,Bullock County FSA Office,Troy,AL
7,1,13,Butler County USDA Serv Cntr,Greenville,AL
8,1,15,Calhoun County USDA Serv Cntr,Anniston,AL
9,1,17,Chambers County FSA Office,Opelika,AL


In [16]:
progCodeHeader = ["CategoryCode", "CategoryName", "ProgramCode", "CommodityCode", "CommodityName", "ProgramName"]
progCode = pandas.read_csv("/home/rfulop/workspace/allegheny-food/data/loan_program_lookup.csv",
                           header=0, names=progCodeHeader)
progCode

Unnamed: 0,CategoryCode,CategoryName,ProgramCode,CommodityCode,CommodityName,ProgramName
0,A1,EMERGING MARKETS PROGRAM,7847,0,,EMERGING MARKETS PROGRAM
1,A2,AGRICULTURAL MANAGEMENT ASSIST,3520,0,,AGRICULTURAL MANAGEMENT ASSISTANCE PROGRAM
2,A2,AGRICULTURAL MANAGEMENT ASSIST,3530,0,,AGRICULTURAL MANAGEMENT ASSISTANCE PROGRAM ...
3,A3,SOIL/WATER CONSERVATION ASSIST,3535,0,,SOIL & WATER AGRICULTURAL ASSISTANCE PROGRAM
4,A4,QUALITY LOSSES PROGRAM,2661,0,,QUALITY LOSSES PROGRAM ...
5,A4,QUALITY LOSSES PROGRAM,2684,0,,QUALITY LOSSES PROGRAM - AUTHORIZED
6,A5,SUPL OILSEED PAYMENT PROGRAM,2660,2500,FLAX,SUPPLEMENTAL OILSEED PAYMENT PROGRAM ...
7,A5,SUPL OILSEED PAYMENT PROGRAM,2660,2600,SOYBEAN,SUPPLEMENTAL OILSEED PAYMENT PROGRAM
8,A5,SUPL OILSEED PAYMENT PROGRAM,2660,6900,SESAME,SUPPLEMENTAL OILSEED PAYMENT PROGRAM ...
9,A5,SUPL OILSEED PAYMENT PROGRAM,2660,7100,SUNFLOWER,SUPPLEMENTAL OILSEED PAYMENT PROGRAM


In [17]:
def getCountyName(office):
    end = office.find(" County")
    if (end == -1):
        return None
    else:
        return office[:end]

locCode["CountyName"] = locCode["OfficeName"].apply(getCountyName)
locCode

Unnamed: 0,StateCode,CountyCode,OfficeName,City,State,CountyName
0,1,0,Alabama State USDA Office,Montgomery,AL,
1,1,1,Autauga County USDA Serv Cntr,Autaugaville,AL,Autauga
2,1,3,Baldwin County USDA Serv Cntr,Bay Minette,AL,Baldwin
3,1,5,Barbour County USDA Serv Cntr,Clayton,AL,Barbour
4,1,7,Bibb County FSA Office,Marion,AL,Bibb
5,1,9,Blount County USDA Serv Cntr,Oneonta,AL,Blount
6,1,11,Bullock County FSA Office,Troy,AL,Bullock
7,1,13,Butler County USDA Serv Cntr,Greenville,AL,Butler
8,1,15,Calhoun County USDA Serv Cntr,Anniston,AL,Calhoun
9,1,17,Chambers County FSA Office,Opelika,AL,Chambers


In [18]:
nearbyCountyNames = pandas.DataFrame(pandas.Series(["Allegheny", "Westmorland", "Washington", "Beaver", "Butler"]))
nearbyCounties = nearbyCountyNames.merge(locCode, left_on=0, right_on="CountyName", how="left")
nearbyCounties = nearbyCounties[nearbyCounties.State == "PA"]
nearbyCounties

Unnamed: 0,0,StateCode,CountyCode,OfficeName,City,State,CountyName
0,Allegheny,42,3,Allegheny County FSA Office,Beaver,PA,Allegheny
1,Westmorland,42,129,Westmorland County USDA Serv Cntr,Greensburg,PA,Westmorland
25,Washington,42,125,Washington County USDA Serv Cntr,Washington,PA,Washington
34,Beaver,42,7,Beaver County USDA Serv Cntr,Beaver,PA,Beaver
43,Butler,42,19,Butler County USDA Serv Cntr,Butler,PA,Butler


In [19]:
localFarmers = recip.merge(nearbyCounties, right_on=["CountyCode", "StateCode"], left_on=["CountyCode", "StateCode"], how="right")

In [20]:
#shows there are no recipients receiving from the PA state office
recip[(recip.CountyCode==0) & (recip.StateCode==42)]

Unnamed: 0,StateCode,CountyCode,CustomerID,LastName,FirstName,MiddleName,SuffixName,NameFormat,NameType,MailingAdd1,MailingAdd2,City,State,Zip,Barcode


In [23]:
#generate output csv
localFarmers.to_csv("output/localFarmers.csv", sep=',')