SDAT information is maintained from a baseline that is updated to Q2 2020, available in a zip file on this link.  The file interest is: Dorc2017.dbf  
https://www.dropbox.com/s/oc1l1frorg66vlr/DORC_MPV17.zip
extract it using: $> tar -xf DORC_MPV17.zip Dorc2017/ATDATA/DATABASE/Dorc2017dbf

Updating this file requires downloading monthly (or quarterly) changes listed on this page:
https://planning.maryland.gov/Pages/OurProducts/DownloadFiles.aspx

All of the updates through 4/30/2021 have been aggegated in a single file written from this notebook.

### NOTE: This notebook adds the assessment fields onto the SDAT before saving it.

In [1]:
# add any packages that aren't available by default
!pip install simpledbf



In [2]:
import pandas as pd
from simpledbf import Dbf5

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### Open the baseline file from the state

In [4]:
dbf = Dbf5('drive/My Drive/SDAT/Dorc2017.dbf')
df = dbf.to_dataframe()
df = df.set_index('acctid')

#### Discover all the update files, and append them in the order they were published

In [5]:
from os import walk

def update(df):
  print("rows:",len(df))
  for (dirpath, dirnames, filenames) in walk('drive/My Drive/SDAT/'):
      for file in ([name for name in sorted(filenames) if 'SALE' in name.upper()]):
        print(dirpath+file)
        add_df = Dbf5(dirpath+file)
        new_df = add_df.to_dataframe()
        new_df.columns = [col.lower() for col in new_df.columns]
        new_df = new_df.query('jurscode == "DORC"').set_index('acctid')
        new_df = new_df[~new_df.index.duplicated(keep='last')]
        updates = [str(v) for v in set(df.index.values).intersection(set(new_df.index.values)) if int(v) > 0]
        df = df.drop(updates)
        update_columns = set(df.columns).intersection(set(new_df.columns))
        df = df.append(new_df[update_columns])
      break
  df = df[~df.index.duplicated(keep='last')]
  print("final:",len(df))
  return df

merged_df = update(df.copy())

23202
drive/My Drive/SDAT/SALE0420.dbf
23208
drive/My Drive/SDAT/SALE0520.dbf
23208
drive/My Drive/SDAT/SALE0620.dbf
23208
drive/My Drive/SDAT/SALE0720.dbf
23208
drive/My Drive/SDAT/SALE0820.dbf
23208
drive/My Drive/SDAT/SALE1020.dbf
23209
drive/My Drive/SDAT/SALE1120.dbf
23209
drive/My Drive/SDAT/SALE1220.dbf
23209
drive/My Drive/SDAT/Sale0121.dbf
23211
drive/My Drive/SDAT/Sale0221.dbf
23211
drive/My Drive/SDAT/Sale0321.dbf
23211
drive/My Drive/SDAT/Sale0421.dbf
23214
drive/My Drive/SDAT/Sale0521.dbf
23214
23214


#### Check to verify the updates from MD are applied OK.  Look up one record that we know was updated.  The merged_df should be like the new record, and the original df should be different now.

In [8]:
add_df = Dbf5('drive/My Drive/SDAT/Sale0521.dbf')
test_df = add_df.to_dataframe()
test_df.columns = [col.lower() for col in test_df.columns]
test_df = test_df.query('jurscode == "DORC"').set_index('acctid')

test_df.query('acctid == "1001000020"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,address,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,towncode,desctown,subdivsn,dsubcode,descsubd,plat,section,block,...,depth,pfuw,pfus,pflw,pfsp,pfsu,pfic,pfih,recind,yearblt,sqftstrc,strugrad,descgrad,strucnst,desccnst,strustyl,descstyl,strubldg,descbldg,lastinsp,lastassd,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,curlndvl,curimpvl,curttlvl,sallndvl,salimpvl,salttlvl,ptype,sdatwebadr,existing
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000020,DORC,505757.4,99135.2,24019970100,240199701002,80,D,5430 INDIANTOWN ROAD,RHODESDALE,21659,PLEASANTS THOMAS R,,PLEASANTS THOMAS R,5430 INDIANTOWN RD,,RHODESDALE,MD,21659,,5430,,INDIANTOWN,RD,RHODESDALE,21659,,IMPV58.963 ACRES,W/S INDIANTOWN RD.,S OF BROOKVIEW,,1595,347,,,0,10000,,,,,...,0.0,1,2,0,0,0,0,0,0,1976,3028,4,Codes range from lowest to highest quality 1-9,1,CNST Siding,1,STRY 1 Story No Basement,1,DWEL Standard Unit,201709,189912,1014,,PLEASANTS THOMAS L & ARLENE F,PLC,266,744,4,20200914,0,0,71800,266300,0,71800,266300,0,2,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18


In [6]:
df.query('acctid == "1001000020"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,...,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,nfmlndvl,nfmimpvl,nfmttlvl,crtarcod,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000020,DORC,505757.4,99135.2,24019970100,240199701002,80,D,SF,5430 INDIANTOWN ROAD,5430,,INDIANTOWN,RD,,,P,RHODESDALE,21659,PLEASANTS THOMAS L,PLEASANTS ARLENE F,PLEASANTS THOMAS L,5430 INDIANTOWN RD,,RHODESDALE,MD,21659,,5430,,INDIANTOWN,RD,RHODESDALE,21659,,IMPS58.963 ACRES,W/S INDIANTOWN RD.,S OF BROOKVIEW,PLC,266,744,...,1014,910736,"ALLEN, CHARLES WALTER AND",,,,1,19910314,160000,0,71800,266300,338100,,,0.0,,,0,,,0.0,,,,,0,1,0,0,0,0,0,0,0,1,2,http://sdat.dat.maryland.gov/RealProperty/Page...,MDPV2017_18,2020JUN


In [7]:
merged_df.query('acctid == "1001000020"')

Unnamed: 0_level_0,jurscode,digxcord,digycord,ct2010,bg2010,geogcode,ooi,resityp,address,strtnum,strtdir,strtnam,strttyp,strtsfx,strtunt,addrtyp,city,zipcode,ownname1,ownname2,namekey,ownadd1,ownadd2,owncity,ownstate,ownerzip,ownzip2,premsnum,premsdir,premsnam,premstyp,premcity,premzip,premzip2,legal1,legal2,legal3,dr1clerk,dr1liber,dr1folio,...,assessor,transno1,grntnam1,gr1clrk1,gr1libr1,gr1folo1,convey1,tradate,considr1,mortgag1,nfmlndvl,nfmimpvl,nfmttlvl,crtarcod,fcmacode,agfndarea,agfndluom,entzndat,entznassm,plndevdat,nprctstdat,nprcarea,nprcluom,homqlcod,homqldat,bldg_story,bldg_units,resident,resi2010,resi2000,resi1990,resiuths,aprtment,trailer,special,other,ptype,sdatwebadr,existing,mdpvdate
acctid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1001000020,DORC,505757.4,99135.2,24019970100,240199701002,80,D,,5430 INDIANTOWN ROAD,,,,,,,,RHODESDALE,21659,PLEASANTS THOMAS R,,PLEASANTS THOMAS R,5430 INDIANTOWN RD,,RHODESDALE,MD,21659,,5430,,INDIANTOWN,RD,RHODESDALE,21659,,IMPV58.963 ACRES,W/S INDIANTOWN RD.,S OF BROOKVIEW,,1595,347,...,1014,,PLEASANTS THOMAS L & ARLENE F,PLC,266,744,4,20200914,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,https://sdat.dat.maryland.gov/RealProperty/Pag...,MDPV2017_18,


#### Verify the result joins cleanly with some enrichment data we carry from year to year  
This is things like names of rental operation groups, notes, etc that we might want to reference with the new data.  This will be added later so we don't have to store it multiple times.

In [10]:
enrichment = pd.read_csv("drive/My Drive/SDAT/CAN-ref.csv").set_index('acctid')
e_merge = merged_df.merge(enrichment, left_index=True, right_index=True, how='outer', indicator=True)
print((len(enrichment), "records. Enriched after the join:", len(e_merge.query('_merge == "both"'))))
print("These have an issue, but that looks ok because they aren't housing:",len(e_merge.query('_merge == "right_only"')))
#e_merge.query('_merge == "right_only"')

(7924, 'records. Enriched after the join:', 7859)
These have an issue, but that looks ok because they aren't housing: 65


### Add the latest assessment data, grabbing it directly from MD Open Data

In [11]:
assessments = pd.read_csv('https://opendata.maryland.gov/resource/ed4q-f8tm.csv?jurisdiction_code_mdp_field_jurscode=DORC&$limit=25000')
assessment_fields = ['account_id_mdp_field_acctid','real_property_search_link',
                     'search_google_maps_for_this_location',
                     'c_a_m_a_system_data_structure_area_sq_ft_mdp_field_sqftstrc_sdat_field_241',
                     'current_assessment_year_total_phase_in_value_sdat_field_171',
                     'c_a_m_a_system_data_year_built_yyyy_mdp_field_yearblt_sdat_field_235',
                     'premise_address_number_mdp_field_premsnum_sdat_field_20',
                     'premise_address_number_suffix_sdat_field_21',
                     'premise_address_direction_mdp_field_premsdir_sdat_field_22',
                     'premise_address_name_mdp_field_premsnam_sdat_field_23',
                     'premise_address_type_mdp_field_premstyp_sdat_field_24',
                     'premise_address_city_mdp_field_premcity_sdat_field_25',
                     'premise_address_zip_code_mdp_field_premzip_sdat_field_26',
                     'mdp_street_address_mdp_field_address']
assessments = assessments[assessment_fields]
assessment_column_names = ['acctid','sdat','google_maps','struct_sqft','assessed_value','address_number','address_unit_id','street_direction','street_name','street_type']
assessments.rename(columns=dict(zip(assessment_fields,assessment_column_names)),inplace=True)
assessments.acctid = assessments.acctid.astype(str)
assessments.set_index('acctid',inplace=True)

  interactivity=interactivity, compiler=compiler, result=result)


In [33]:
sdat_plus_assessments = merged_df.reset_index().merge(assessments,how='outer',indicator=True).set_index('acctid')
print("SDAT has ",len(merged_df), "records.  After the join there are:",len(sdat_plus_assessments.query('_merge == "both"')))

SDAT has  23191 records.  After the join there are: 23191


### Write the combined data set out for use later.

In [36]:
sdat_plus_assessments.query('_merge == "both"').drop(columns='_merge').to_csv('drive/My Drive/pita 2021/SDAT-CAN-ref-202105.csv')