In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd

data = gpd.read_file("../00_source_data/anson-parcels")
data.head()

Unnamed: 0,ALTPARNO,CNTYFIPS,CNTYNAME,GISACRES,GNISID,IMPROVVAL,LANDVAL,LEGDECFULL,MADDPREF,MADDRNO,...,STRUCT,STRUCTNO,STRUCTYEAR,SUBDIVISIO,SUBOWNTYPE,SUBSURFOWN,SUNIT,SZIP,TRANSFDATE,geometry
0,UNION COUNTY,7,Anson,3.542672,1008534,0.0,0.0,,,,...,,,0,,,,,,2022-12-26,POINT (1608012.361 429959.033)
1,NewParcel709,7,Anson,0.004462,1008534,0.0,0.0,,,,...,,,0,,,,,,2022-12-26,POINT (1626508.486 508899.478)
2,NewParcel264,7,Anson,0.000318,1008534,0.0,0.0,,,,...,,,0,,,,,,2022-12-26,POINT (1691005.125 449305.302)
3,ASSESSED IN UNION,7,Anson,31.50341,1008534,0.0,0.0,,,,...,,,0,,,,,,2022-12-26,POINT (1614986.389 494392.328)
4,assessed in union county,7,Anson,0.1887,1008534,0.0,0.0,,,,...,,,0,,,,,,2022-12-26,POINT (1613144.845 478086.780)


In [2]:
data.columns

Index(['ALTPARNO', 'CNTYFIPS', 'CNTYNAME', 'GISACRES', 'GNISID', 'IMPROVVAL',
       'LANDVAL', 'LEGDECFULL', 'MADDPREF', 'MADDRNO', 'MADDSTNAME', 'MADDSTR',
       'MADDSTSUF', 'MADDSTTYP', 'MAILADD', 'MAPREF', 'MCITY', 'MSTATE',
       'MULTISTRUC', 'MUNIT', 'MZIP', 'NPARNO', 'OwnerName2', 'OWNFRST',
       'OWNLAST', 'OWNNAME', 'OWNNAME2', 'OWNTYPE', 'PARNO', 'PARUSECD2',
       'PARUSECODE', 'PARUSEDESC', 'PARUSEDSC2', 'PARVAL', 'PARVALTYPE',
       'PRESENTVAL', 'RECAREANO', 'RECAREATX', 'REVDATETX', 'REVISEDATE',
       'REVISEYEAR', 'SADDNO', 'SADDPREF', 'SADDSTNAME', 'SADDSTR',
       'SADDSTSUF', 'SADDSTTYP', 'SALEDATE', 'SALEDATETX', 'SCITY', 'SITEADD',
       'SOURCEAGNT', 'SOURCEDATE', 'SOURCEDATX', 'SOURCEREF', 'SSTATE',
       'STCNTYFIPS', 'STFIPS', 'STNAME', 'STRUCT', 'STRUCTNO', 'STRUCTYEAR',
       'SUBDIVISIO', 'SUBOWNTYPE', 'SUBSURFOWN', 'SUNIT', 'SZIP', 'TRANSFDATE',
       'geometry'],
      dtype='object')

In [3]:
data.shape

(18874, 69)

In [4]:
((data.isna().sum()*100)/data.shape[0]).sort_values(ascending=False)[:10] #basically, several columns are missing data completely

OWNTYPE       100.0
OwnerName2    100.0
OWNLAST       100.0
OWNNAME2      100.0
SCITY         100.0
PARUSECD2     100.0
PARUSECODE    100.0
PARUSEDESC    100.0
PARUSEDSC2    100.0
PARVAL        100.0
dtype: float64

In [5]:
remove= data.columns[data.isna().all()].tolist()
print (f"{len(remove)} columns have all NAs")
data.drop(remove, axis=1, inplace=True) #dropping columns with all NAs
data.shape 

41 columns have all NAs


(18874, 28)

In [6]:
data.columns

## ALTPARNO - An alternate local parcel number for the parcel record 
## GISACRES - The area of the feature in acres - computed from the GIS, this is not the recorded area.
## GNISID - The Geographic Names Information System identifier for the local place for the parcel.  The default value is the county GNIS number but as this data set develops, individual parcels may have a GNIS identifier, such as local parks or attractions.
## IMPROVVAL - The value of the improvements on the parcel in dollars.
## LANDVAL - The value of the land represented by the parcel in dollars.
## NPARNO - The local parcel number with the state and county FIPS added to the beginning of the local parcel number (PARNO).
## OWNNAME- The full name of the primary property owner (surface owner).
## RECARENO - The record or recorded area as a numeric field (deed acres).

Index(['ALTPARNO', 'CNTYFIPS', 'CNTYNAME', 'GISACRES', 'GNISID', 'IMPROVVAL',
       'LANDVAL', 'MAILADD', 'MCITY', 'MSTATE', 'MULTISTRUC', 'MZIP', 'NPARNO',
       'OWNNAME', 'PARNO', 'PARVALTYPE', 'RECAREANO', 'RECAREATX', 'SITEADD',
       'SOURCEAGNT', 'SOURCEREF', 'SSTATE', 'STCNTYFIPS', 'STFIPS', 'STNAME',
       'STRUCTYEAR', 'TRANSFDATE', 'geometry'],
      dtype='object')

In [7]:
data.head()

Unnamed: 0,ALTPARNO,CNTYFIPS,CNTYNAME,GISACRES,GNISID,IMPROVVAL,LANDVAL,MAILADD,MCITY,MSTATE,...,SITEADD,SOURCEAGNT,SOURCEREF,SSTATE,STCNTYFIPS,STFIPS,STNAME,STRUCTYEAR,TRANSFDATE,geometry
0,UNION COUNTY,7,Anson,3.542672,1008534,0.0,0.0,""" """" """,,,...,,Anson County Assessor,_,NC,37007,37,NC,0,2022-12-26,POINT (1608012.361 429959.033)
1,NewParcel709,7,Anson,0.004462,1008534,0.0,0.0,""" """" """,,,...,,Anson County Assessor,_,NC,37007,37,NC,0,2022-12-26,POINT (1626508.486 508899.478)
2,NewParcel264,7,Anson,0.000318,1008534,0.0,0.0,""" """" """,,,...,,Anson County Assessor,_,NC,37007,37,NC,0,2022-12-26,POINT (1691005.125 449305.302)
3,ASSESSED IN UNION,7,Anson,31.50341,1008534,0.0,0.0,""" """" """,,,...,,Anson County Assessor,_,NC,37007,37,NC,0,2022-12-26,POINT (1614986.389 494392.328)
4,assessed in union county,7,Anson,0.1887,1008534,0.0,0.0,""" """" """,,,...,,Anson County Assessor,_,NC,37007,37,NC,0,2022-12-26,POINT (1613144.845 478086.780)


In [8]:
list(data['OWNNAME'].unique())

[None,
 'TAYLOR JODY MATTHEW II &',
 'STATEOF NORTH CAROLINA',
 'STATE OF NORTH CAROLINA',
 'TC&I TIMBER CO LLC',
 'ON POINT VENTURES LLC',
 'DELIGERO DELL E &',
 'DELIGERO SIMPLICIO E &',
 'ROSS TAMMY A',
 'JARMAN HAILY A',
 'BUTCHER ROBERT  & WIFE',
 'DONALDO ANGELA S  &',
 'ALBURO MARIA PAZ R',
 'COVER JOHN & WIFE',
 'DELIGERO PASCUAL & WIFE',
 'HONSTEAD TERESA & PRESTON A',
 'MCFARLAND WILLIAM &',
 'TOWNSEND LENNOX',
 'ZAREMBY DAVID M & WIFE',
 'VICERA MONTANA L',
 'SHIRLEY ROBERT JR &',
 'PRECIADOS VIRGILIO C',
 'ESTEP JAMES & WIFE JOANNE',
 'XAVIER STEPHEN & WIFE',
 'URAM THOMAS G  & WIFE',
 'BERGES LUIS E & WIFE',
 'ORELLANA JENNIFER ANDREA',
 'CARDILICCHIA TINA  &',
 'PETERS JAMES A',
 'COLON-MELENDEZ YVETTE',
 'BRASWELL SHANNON LEE & WIFE',
 'CROSSINGS PROPERTY OWNERS ASS',
 'DUKE ENERGY PROGESS INC',
 'JACOBS JAMES ANDREW & WIFE',
 'GUASCH SUZANNE',
 'KAPLAN DAVID & WIFE',
 'PETERSEN EARLE D  &  WIFE',
 'BROSKY THOMAS ANTHONY II &WIFE',
 'KHATIB RANDA MNAYMNEH',
 'HARRISON CA

The best way to get information on residential units (RUs) is to use the name of the primary owner. Using the assumption that commercial units will have labels like 'LLC' or 'Ventures', we can filter for RUs. 
Notes: We need to check whether commercial units can be registered under the name of the property owner. There are also 175 nulls in this column.


In [9]:
key= ["STATE","STATEOF","LLC","VENTURES","CHURCH", "INC", "ASS", "DUKE", "TRUST", "PROPERTY", "PROPERTIES","COUNTY", "CEMETERY", "CORP", "HOLDINGS", "ELECTRIC", "WOODYARD", "CORP", "FARMS"]
notRU=[]
for i in data['OWNNAME']:
    try:
        notRU.append(bool([ele for ele in key if(ele in i)]))
    except:
        notRU.append(None)

In [10]:
data['notRU']=notRU
data['notRU'].value_counts() #So there are 16,176 Residential Units

False    16176
True      2523
Name: notRU, dtype: int64

As a next step, we can improve the filtering. Further, we can judge nonRUs if they area they cover seem unusual.

In [11]:
data[data['notRU']==False].GISACRES.describe()

count    16176.000000
mean        13.265803
std         45.700110
min          0.005498
25%          0.498611
50%          1.494133
75%          8.265828
max       2883.638974
Name: GISACRES, dtype: float64

In [12]:
data[(data['notRU']==False) & (data['GISACRES']>500)] #setting threshold of 500

Unnamed: 0,ALTPARNO,CNTYFIPS,CNTYNAME,GISACRES,GNISID,IMPROVVAL,LANDVAL,MAILADD,MCITY,MSTATE,...,SOURCEAGNT,SOURCEREF,SSTATE,STCNTYFIPS,STFIPS,STNAME,STRUCTYEAR,TRANSFDATE,geometry,notRU
571,742600234166,7,Anson,693.901678,1008534,0.0,445300.0,"PO BOX 575"" ""NORWOOD"" ""28128-0000",NORWOOD,NC,...,Anson County Assessor,239_0308,NC,37007,37,NC,0,2022-12-26,POINT (1721129.563 462337.057),False
774,742300358033,7,Anson,619.253441,1008534,0.0,618900.0,"PO BOX 418"" ""LILESVILLE"" ""28091-0000",LILESVILLE,NC,...,Anson County Assessor,072_0172,NC,37007,37,NC,0,2022-12-26,POINT (1723776.753 435342.485),False
1539,740700796007,7,Anson,588.950717,1008534,161700.0,289600.0,"78 CROSSBILL LANE UNIT 3"" ""HENDERSONVILLE"" ""28...",HENDERSONVILLE,NC,...,Anson County Assessor,190_0879,NC,37007,37,NC,0,2022-12-26,POINT (1707429.862 479044.006),False
1558,740700288449,7,Anson,577.019585,1008534,0.0,218100.0,"122 SIDES RD"" ""MT GILEAD"" ""27306-0000",MT GILEAD,NC,...,Anson County Assessor,249_0473,NC,37007,37,NC,0,2022-12-26,POINT (1702553.891 478103.354),False
2579,740200426850,7,Anson,780.901378,1008534,125400.0,542700.0,"2323 DIGGS RD"" ""WADESBORO"" ""28170-0000",WADESBORO,NC,...,Anson County Assessor,450_0198,NC,37007,37,NC,0,2022-12-26,POINT (1705949.000 422238.161),False
3025,732900471026,7,Anson,2883.638974,1008534,268600.0,1858200.0,"PO DRAWER 1449"" ""CHERAW"" ""29520-0000",CHERAW,SC,...,Anson County Assessor,561_0222,NC,37007,37,NC,0,2022-12-26,POINT (1718998.563 396489.762),False
3420,656000227758,7,Anson,537.818889,1008534,0.0,256800.0,"PO BOX 36"" ""NORWOOD"" ""28128-0000",NORWOOD,NC,...,Anson County Assessor,137_0310,NC,37007,37,NC,0,2022-12-26,POINT (1663933.806 502111.422),False
7203,648300404136,7,Anson,673.839651,1008534,8200.0,443800.0,"PO BOX 215"" ""WADESBORO"" ""28170-0000",WADESBORO,NC,...,Anson County Assessor,134_0514,NC,37007,37,NC,0,2022-12-26,POINT (1684660.867 429354.731),False
7285,648100447935,7,Anson,705.119904,1008534,0.0,452800.0,"9000 HWY 49N PO BOX 535"" ""MT PLEASANT"" ""28124-...",MT PLEASANT,NC,...,Anson County Assessor,619_0307,NC,37007,37,NC,0,2022-12-26,POINT (1686065.478 412301.724),False
12329,646100943720,7,Anson,532.94641,1008534,35900.0,389200.0,"28838 KENDALLS CHURCH ROAD"" ""RICHFIELD"" ""28137...",RICHFIELD,NC,...,Anson County Assessor,025_0049,NC,37007,37,NC,0,2022-12-26,POINT (1669351.769 413391.285),False


In [13]:
data[(data['notRU']==False) & (data['GISACRES']>500)]['OWNNAME'] #so remove some of these in the future

571            CLODFELTER T LYNN & WIFE
774        HEDRICK B V GRAVEL & SAND CO
1539                 BRUTON JEFFERSON H
1558             SIDES PRESTON EUGENE &
2579       OAKS RONALD THOMAS & PATSY S
3025       MCLEOD PARTNERS LIMITED PART
3420                   BEACHUM SHERRY A
7203              CAPEL EVELYN REDFEARN
7285        PIEDMONT HARDWOOD LUMBER CO
12329               BURLESON ANDREW W &
13060            EDWARDS TIMBER COMPANY
13194            EDWARDS TIMBER COMPANY
14247         LITTLE FAMILY PARTNERSHIP
14293           DAVID BALL TUCKER ET AL
14420    HUNTLEY FAMILY LIMITED PRTNSHP
15210       CAUDLE ABIGAIL SUZANNE ETAL
15429          BELL CREEK LAND & TIMBER
16552                 EDWARDS TIMBER CO
Name: OWNNAME, dtype: object