In [11]:
import pandas as pd
import numpy as np

from io import StringIO

In [2]:
df = pd.DataFrame({
    'address': ['4126 N 74TH AVE', '2301 E UNIVERSITY DR UNIT 111'],
    'address2': ['APT 4B', ''],
    'city': ['PHOENIX', 'MESA'],
    'state': ['AZ', 'AZ'],
    'zip': ['85033', '85213']
})

In [3]:
df

Unnamed: 0,address,address2,city,state,zip
0,4126 N 74TH AVE,APT 4B,PHOENIX,AZ,85033
1,2301 E UNIVERSITY DR UNIT 111,,MESA,AZ,85213


In [7]:
list(df.columns)

['address', 'address2', 'city', 'state', 'zip']

In [10]:
def moo():
    return({
        'shape': df.shape
    })

In [11]:
moo()

{'shape': (2, 5)}

### Create an empty Pandas DataFrame with predefined schema

In [10]:
str1 = '''apple,banana,cherry,dingleberry,elephant
arnie,bert,claude,dominic,ernie
archie,barney,charlie,donnie,ed'''

str2 = '''alpha,bravo,charlie,delta,echo
alpha,beta,gamma,delta,epsilon
axe,barrel,cart,dremel,easy'''

In [22]:
df_columns = ['a', 'b', 'c', 'd', 'e']
df = pd.DataFrame(columns=df_columns)
df

Unnamed: 0,a,b,c,d,e


In [23]:
inbuf = StringIO()
inbuf.write(str1)
inbuf.seek(0)
a_temp = pd.read_csv(inbuf, header=None, dtype=str, encoding='utf-8', quotechar='"', names=df_columns)
a_temp.fillna('', inplace=True)
df = pd.concat([df, a_temp], ignore_index=True)
df

Unnamed: 0,a,b,c,d,e
0,apple,banana,cherry,dingleberry,elephant
1,arnie,bert,claude,dominic,ernie
2,archie,barney,charlie,donnie,ed


In [24]:
inbuf = StringIO()
inbuf.write(str2)
inbuf.seek(0)
a_temp = pd.read_csv(inbuf, header=None, dtype=str, encoding='utf-8', quotechar='"', names=df_columns)
a_temp.fillna('', inplace=True)
df = pd.concat([df, a_temp], ignore_index=True)
df

Unnamed: 0,a,b,c,d,e
0,apple,banana,cherry,dingleberry,elephant
1,arnie,bert,claude,dominic,ernie
2,archie,barney,charlie,donnie,ed
3,alpha,bravo,charlie,delta,echo
4,alpha,beta,gamma,delta,epsilon
5,axe,barrel,cart,dremel,easy


In [4]:
df_schema = {
    'a': 'str',
    'b': 'int64',
    'c': 'int32',
    'd': 'float64',
    'e': 'datetime'
}
df = pd.DataFrame(dtype=df_schema)

ValueError: entry not a 2- or 3- tuple

### feature engineering

In [30]:
ser = df['address'].str.cat(df['address2'], ' ').str.strip()
for s in ser:
    print('>>>{}<<<'.format(s))

>>>4126 N 74TH AVE APT 4B<<<
>>>2301 E UNIVERSITY DR UNIT 111<<<


### assignment

In [12]:
df2 = df.copy()

In [13]:
del(df2['address'])

In [14]:
df2

Unnamed: 0,city,state,zip
0,PHOENIX,AZ,85033
1,MESA,AZ,85213


In [15]:
df

Unnamed: 0,address,city,state,zip
0,4126 N 74TH AVE,PHOENIX,AZ,85033
1,2301 E UNIVERSITY DR UNIT 111,MESA,AZ,85213


### stringify

In [4]:
from io import StringIO

In [7]:
df.to_json()

'{"address":{"0":"4126 N 74TH AVE","1":"2301 E UNIVERSITY DR UNIT 111"},"city":{"0":"PHOENIX","1":"MESA"},"state":{"0":"AZ","1":"AZ"},"zip":{"0":"85033","1":"85213"}}'

In [9]:
resp = {
  "statusCode": 200,
  "body": "{\"rowid\":{\"0\":0,\"1\":1},\"address\":{\"0\":\"4126 N 74TH AVE, PHOENIX, AZ, 85033\",\"1\":\"2301 E UNIVERSITY DR UNIT 111, MESA, AZ, 85213\"},\"match1\":{\"0\":\"Match\",\"1\":\"No_Match\"},\"match2\":{\"0\":\"Exact\",\"1\":null},\"maddress\":{\"0\":\"4126 N 74TH AVE, PHOENIX, AZ, 85033\",\"1\":null},\"tlid\":{\"0\":\"128193652\",\"1\":null},\"sside\":{\"0\":\"L\",\"1\":null},\"state\":{\"0\":\"04\",\"1\":null},\"county\":{\"0\":\"013\",\"1\":null},\"tract\":{\"0\":\"109602\",\"1\":null},\"block\":{\"0\":null,\"1\":null},\"lon\":{\"0\":\"-112.218796\",\"1\":\"\"},\"lat\":{\"0\":\"33.496357\",\"1\":\"\"},\"source\":{\"0\":\"ACS2018\",\"1\":\"ACS2018\"}}"
}

In [12]:
pd.read_json(resp['body'])

Unnamed: 0,rowid,address,match1,match2,maddress,tlid,sside,state,county,tract,block,lon,lat,source
0,0,"4126 N 74TH AVE, PHOENIX, AZ, 85033",Match,Exact,"4126 N 74TH AVE, PHOENIX, AZ, 85033",128193652.0,L,4.0,13.0,109602.0,,-112.218796,33.496357,ACS2018
1,1,"2301 E UNIVERSITY DR UNIT 111, MESA, AZ, 85213",No_Match,,,,,,,,,,,ACS2018
