# Cleanup and Exploration of 2015 AQ Data

In [1]:
import pandas as pd
import json as json

In [2]:
df = pd.read_csv('2015 AQ.csv')

In [3]:
df.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,sourcename,sourcetype,mobile
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,Texas,,
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,


The entry is not valid json.  The field will need processing code.

In [4]:
def validate_json_fields(field: str) -> bool:
    """This function validates a JSON entry is valid.  Returns True if so, False if otherwise."""
    try:
        json.loads(field)
        return True
    except ValueError:
        return False

In [5]:
def validate_json_column(series: pd.Series):
    temp_df = pd.DataFrame()
    temp_df['values'] = series
    temp_df['validity'] = series.apply(validate_json_fields)
    return temp_df

## Validation of composite fields as json

In [6]:
output = validate_json_column(df['date'])

In [7]:
output.head()

Unnamed: 0,values,validity
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",False
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",False
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",False
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",False
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",False


In [8]:
output[output['validity'] == True].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [9]:
output = validate_json_column(df['attribution'])
output.head()

Unnamed: 0,values,validity
0,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]",False
1,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]",False
2,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]",False
3,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]",False
4,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]",False


In [10]:
output[output['validity'] == True].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [11]:
output = validate_json_column(df['averagingperiod'])
output.head()

Unnamed: 0,values,validity
0,"{unit=hours, value=1.0}",False
1,"{unit=hours, value=1.0}",False
2,"{unit=hours, value=1.0}",False
3,"{unit=hours, value=1.0}",False
4,"{unit=hours, value=1.0}",False


In [12]:
output[output['validity'] == True].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [13]:
output = validate_json_column(df['coordinates'])
output.head()

Unnamed: 0,values,validity
0,"{latitude=29.767996, longitude=-95.22058}",False
1,"{latitude=29.767996, longitude=-95.22058}",False
2,"{latitude=29.767996, longitude=-95.22058}",False
3,"{latitude=29.901037, longitude=-95.32613}",False
4,"{latitude=29.767996, longitude=-95.22058}",False


In [14]:
output[output['validity'] == True].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


All of the composite fields are in a "JSON-like format"  need to convert to JSON.

## Reprocessing the Multivalue Fields into Valid JSON

In [15]:
def correct_field_to_json(arg: str) -> str:
    string = arg.lstrip("{{")
    string = string.rstrip("]}")
    strings = string.split(",")
    new_strings = []
    for entry in strings:
        index = entry.find('=')
        entry = '"' + entry[0:index] + '"' + ':' + '"'+ entry[index+1:len(entry)]+'"'
        new_strings.append(entry)
    output_string = (',').join(entry for entry in new_strings)
    output_string = '{' + output_string + '}'
    return output_string

In [16]:
def correct_json_column(series: pd.Series, column_name: str) -> pd.DataFrame:
    temp_df = pd.DataFrame()
    temp_df[column_name] = series.apply(correct_field_to_json)
    return temp_df

In [17]:
test = correct_json_column(df['date'], 'test date conversion')
results = validate_json_column(test['test date conversion'])
results[results['validity'] == False].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [18]:
df['cleaneddate'] = test

In [19]:
test = correct_json_column(df['attribution'], 'test attribute conversion')
results = validate_json_column(test['test attribute conversion'])
results[results['validity'] == False].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [20]:
results.head()

Unnamed: 0,values,validity
0,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",True
1,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",True
2,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",True
3,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",True
4,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",True


In [21]:
df['cleanedattribution'] = test

In [22]:
test = correct_json_column(df['averagingperiod'], 'test averaging period conversion')
results = validate_json_column(test['test averaging period conversion'])
results[results['validity'] == False].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [23]:
df['cleanedaveragingperiod'] = test

In [24]:
test = correct_json_column(df['coordinates'], 'test coordinate conversion')
results = validate_json_column(test['test coordinate conversion'])
results[results['validity'] == False].describe()

Unnamed: 0,values,validity
count,0.0,0.0
unique,0.0,0.0
top,,
freq,,


In [25]:
df['cleanedcoordinates'] = test

In [26]:
df.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,sourcename,sourcetype,mobile,cleaneddate,cleanedattribution,cleanedaveragingperiod,cleanedcoordinates
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T02:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}"
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T04:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}"
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}"
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,Texas,,,"{""utc"":""2015-12-30T01:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}"
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}"


In [27]:
df_trans = df

In [28]:
df_trans = pd.concat([df_trans, df_trans.cleanedcoordinates.apply(json.loads).apply(pd.Series)], axis = 1)

In [29]:
df_trans.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,sourcename,sourcetype,mobile,cleaneddate,cleanedattribution,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T02:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T04:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,Texas,,,"{""utc"":""2015-12-30T01:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058


In [30]:
df_trans = pd.concat([df_trans, df_trans.cleaneddate.apply(json.loads).apply(pd.Series)], axis = 1)

In [31]:
df_trans.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,...,sourcetype,mobile,cleaneddate,cleanedattribution,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude,utc,local
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,,,"{""utc"":""2015-12-30T02:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,,,"{""utc"":""2015-12-30T04:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,...,,,"{""utc"":""2015-12-30T01:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00


In [32]:
df_trans = pd.concat([df_trans, df_trans.cleanedattribution.apply(json.loads).apply(pd.Series)], axis = 1)

In [33]:
df_trans.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,...,cleanedattribution,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude,utc,local,[{name,url,{name
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,...,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,


In [34]:
df_trans.drop(' {name', axis=1, inplace=True)

In [35]:
df_trans.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,...,cleaneddate,cleanedattribution,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude,utc,local,[{name,url
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""utc"":""2015-12-30T02:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""utc"":""2015-12-30T04:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,...,"{""utc"":""2015-12-30T01:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...","{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us


In [36]:
df_trans = pd.concat([df_trans, df_trans.cleanedaveragingperiod.apply(json.loads).apply(pd.Series)], axis = 1)

In [37]:
df_trans.head()

Unnamed: 0,date,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,...,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude,utc,local,[{name,url,unit.1,value.1
0,"{utc=2015-12-30T02:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,"{utc=2015-12-30T04:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,"{utc=2015-12-30T01:00:00.000Z, local=2015-12-2...",no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,"{utc=2015-12-30T00:00:00.000Z, local=2015-12-2...",no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [38]:
df_trans.drop('date', axis =1, inplace=True)

In [39]:
df_trans.head()

Unnamed: 0,parameter,location,value,unit,city,attribution,averagingperiod,coordinates,country,sourcename,...,cleanedaveragingperiod,cleanedcoordinates,latitude,longitude,utc,local,[{name,url,unit.1,value.1
0,no2,Houston East C1/G316,0.0118,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.901037, longitude=-95.32613}",US,Texas,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.901037"","" longitude"":""-95.32613""}",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,"[{name=TCEQ, url=http://www.tceq.state.tx.us}]","{unit=hours, value=1.0}","{latitude=29.767996, longitude=-95.22058}",US,Texas,...,"{""unit"":""hours"","" value"":""1.0""}","{""latitude"":""29.767996"","" longitude"":""-95.22058""}",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [40]:
df_trans.drop(['attribution','averagingperiod','coordinates','cleanedaveragingperiod','cleanedcoordinates'], axis = 1, inplace = True)

In [41]:
df_trans.head()

Unnamed: 0,parameter,location,value,unit,city,country,sourcename,sourcetype,mobile,cleaneddate,cleanedattribution,latitude,longitude,utc,local,[{name,url,unit.1,value.1
0,no2,Houston East C1/G316,0.0118,ppm,Houston,US,Texas,,,"{""utc"":""2015-12-30T02:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,US,Texas,,,"{""utc"":""2015-12-30T04:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,US,Texas,,,"{""utc"":""2015-12-30T01:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,US,Texas,,,"{""utc"":""2015-12-30T00:00:00.000Z"","" local"":""20...","{""[{name"":""TCEQ"","" url"":""http://www.tceq.state...",29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [42]:
df_trans.drop(['cleaneddate','cleanedattribution'], axis = 1, inplace = True)

In [43]:
df_trans.head()

Unnamed: 0,parameter,location,value,unit,city,country,sourcename,sourcetype,mobile,latitude,longitude,utc,local,[{name,url,unit.1,value.1
0,no2,Houston East C1/G316,0.0118,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,US,Texas,,,29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [44]:
#df_trans.rename(columns={df_trans.columns[15]: 'averaging time unit'}, inplace=True)
df_trans.columns.values[3]='concentration unit'

In [45]:
df_trans.head()

Unnamed: 0,parameter,location,value,concentration unit,city,country,sourcename,sourcetype,mobile,latitude,longitude,utc,local,[{name,url,unit,value.1
0,no2,Houston East C1/G316,0.0118,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,US,Texas,,,29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [46]:
#df_trans.rename(columns={df_trans.columns[15]: 'averaging time unit'}, inplace=True)
df_trans.columns.values[15] = 'averaging time unit'

In [47]:
#df_trans.rename(columns={df_trans.columns[13]: 'source'}, inplace=True)
df_trans.columns.values[13] = 'source'

In [48]:
df_trans.head()

Unnamed: 0,parameter,location,value,concentration unit,city,country,sourcename,sourcetype,mobile,latitude,longitude,utc,local,source,url,averaging time unit,value.1
0,no2,Houston East C1/G316,0.0118,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,US,Texas,,,29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [49]:
#df_trans.rename(columns={df_trans.columns[16]: 'averaging time'},inplace=True)
df_trans.columns.values[16] = 'averaging time'

In [50]:
df_trans.head()

Unnamed: 0,parameter,location,value,concentration unit,city,country,sourcename,sourcetype,mobile,latitude,longitude,utc,local,source,url,averaging time unit,averaging time
0,no2,Houston East C1/G316,0.0118,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T02:00:00.000Z,2015-12-29T20:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
1,o3,Houston East C1/G316,0.02,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T04:00:00.000Z,2015-12-29T22:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
2,o3,Houston East C1/G316,0.018,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
3,no2,Houston Aldine C8/AF108/X150,0.0007,ppm,Houston,US,Texas,,,29.901037,-95.32613,2015-12-30T01:00:00.000Z,2015-12-29T19:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0
4,no2,Houston East C1/G316,0.0137,ppm,Houston,US,Texas,,,29.767996,-95.22058,2015-12-30T00:00:00.000Z,2015-12-29T18:00:00-06:00,TCEQ,http://www.tceq.state.tx.us,hours,1.0


In [51]:
df_trans.to_csv('2015 AQ Clean.csv', index=False)