In [1]:
import os
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option("display.max_colwidth", None)
import folium

In [2]:

# don't need this form, moved file
# my_file_1 = r'D:\CRITICAL - MAIN DATAFILE - MILESTONE II\AoT_Chicago.complete.2021-09-12\nodes.csv'

my_nodes_file = r'data\nodes.csv'  # main nodes.csv file that comes with huge dataset 

nodes_df = pd.read_csv(my_nodes_file)
    

### A) Analyzing the nodes.csv file (metadata): 

In [3]:

# remember, if you wanted, you could probably directly connect to this file on 
# latest github download, but probably safer download very latest file yourself to computer...
# make a call later on that 


In [4]:

# list out all of the unique nodes names 

nodes_df.node_id


0      001e0610ba46
1      001e0610ba3b
2      001e0610f02f
3      001e0610ba8f
4      001e0610ba16
5      001e06107e5d
6      001e0610ba8b
7      001e0610ba13
8      001e0610ba18
9      001e0610bc10
10     001e0610bbf9
11     001e0610bbff
12     001e0610b9e7
13     001e0610ba15
14     001e0610bbe5
15     001e0610ee33
16     001e0610b9e5
17     001e0610f8f4
18     001e0610ee41
19     001e0610eef4
20     001e0610ef29
21     001e0610f668
22     001e0610f730
23     001e0610bc07
24     001e0610ef26
25     001e0610ea5a
26     001e0610ee61
27     001e0610ba81
28     001e0610ba57
29     001e0610ee82
30     001e0610ef27
31     001e0610fb4c
32     001e0610ee6f
33     001e0610ef68
34     001e0610e809
35     001e0610ee36
36     001e061135cb
37     001e0610e532
38     001e0610e8cb
39     001e0610ee5d
40     001e0610ef73
41     001e0610e540
42     001e0610f6dd
43     001e06113ad8
44     001e0611441e
45     001e06113d83
46     001e06112e77
47     001e0610f6db
48     001e06115365
49     001e06113cff


In [5]:

print('\nThere are this many unique sensor nodes: ', len(nodes_df.node_id), "\n")
# they have pretty odd looking identifiers, all starting with 001e...



There are this many unique sensor nodes:  126 



In [6]:

nodes_df

# showing all of the rows and columns...
#
# i believe four of the nodes were eventually decommissioned...
# (that is why end_timestamp shows a date when it was killed off)
# we have things like address/lat/long/when it was 'born' (start_timestamp)
# also, project_id is always AoT_Chicago, later they had sensors in new cities... 

# as we will see, under the description, you will find that it contains a S or C or T, 
# from this explanation:

#################################################################################
### Additional details about a node are contained in the description field. 
### The letters inside the brackets [ ] indicate:
###   C - Node is equipped with chemical sensors.
###   A - Node is equipped with Alphasense OPN-N2 air quality sensor.
###   P - Node is equipped with Plantower PMS7003 air quality sensor.
#################################################################################


Unnamed: 0,node_id,project_id,vsn,address,lat,lon,description,start_timestamp,end_timestamp
0,001e0610ba46,AoT_Chicago,004,State St & Jackson Blvd Chicago IL,41.878377,-87.627678,AoT Chicago (S) [C],2017/10/09 00:00:00,
1,001e0610ba3b,AoT_Chicago,006,18th St & Lake Shore Dr Chicago IL,41.858136,-87.616055,AoT Chicago (S),2017/08/08 00:00:00,
2,001e0610f02f,AoT_Chicago,00A,Lake Shore Drive & Fullerton Ave Chicago IL,41.926261,-87.630758,AoT Chicago (S) [CA],2018/05/07 00:00:00,
3,001e0610ba8f,AoT_Chicago,00D,Cornell & 47th St Chicago IL,41.810342,-87.590228,AoT Chicago (S),2017/08/08 00:00:00,
4,001e0610ba16,AoT_Chicago,010,Homan Ave & Roosevelt Rd Chicago IL,41.866349,-87.710543,AoT Chicago (S) [C],2018/07/18 00:00:00,
5,001e06107e5d,AoT_Chicago,014,State St & Washington St Chicago IL,41.883205,-87.627769,AoT Chicago (T),2016/09/14 00:00:00,
6,001e0610ba8b,AoT_Chicago,018,Stony Island Ave & 63rd St Chicago IL,41.7806,-87.586456,AoT Chicago (S) [C],2018/02/26 00:00:00,
7,001e0610ba13,AoT_Chicago,01C,7801 S Lawndale Ave Chicago IL,41.751238,-87.71299,AoT Chicago (S) [C] {ComEd},2018/01/01 00:00:00,
8,001e0610ba18,AoT_Chicago,01D,Damen Ave & Cermak Chicago IL,41.852179,-87.675825,AoT Chicago (S),2017/12/15 00:00:00,
9,001e0610bc10,AoT_Chicago,01F,State St & 87th Chicago IL,41.736314,-87.624179,AoT Chicago (S) [C],2018/02/22 00:00:00,



## --- Project Digest Readme ----

The files in this directory contain sensor data and the associated meta-data that
will enable parsing the sensor values.

## Overview

This sensor data digest contains the following files:

* `data.csv.gz` - Sensor data ordered by ascending timestamp.
* `nodes.csv` - Nodes metadata.
* `sensors.csv` - Sensor metadata.
* `provenance.csv` - Provenance metadata.

These files will be described in-depth in the following sections.

### Sensor Data

The sensor data file is an aggregate of all published data from the project's
nodes. By published, we mean:

* Data was read from a whitelisted node belonging to the project.
* Data was read during that node's commissioning period.



The `data.csv.gz` file is a compressed CSV with the following, but not limited to, columns:

* `timestamp` - UTC timestamp of when the measurement was done.
* `node_id` - ID of node which did the measurement.
* `subsystem` - Subsystem of node containing sensor.
* `sensor` - Sensor that was measured.
* `parameter` - Sensor parameter that was measured.
* `value_raw` - Raw measurement value from sensor.
* `value_hrf` - Converted, "human readable" value from sensor.

These fields will always be provided as a header, for example:
```
timestamp,node_id,subsystem,sensor,parameter,value_raw,value_hrf
2017/09/09 22:12:44,001e0610ba8f,lightsense,hih4030,humidity,NA,32.18
2017/09/09 22:12:44,001e0610ba8f,lightsense,hih4030,temperature,NA,48.55
2017/09/09 22:12:44,001e0610ba8f,lightsense,ml8511,intensity,9643,NA
2017/09/09 22:12:44,001e0610ba8f,lightsense,tmp421,temperature,NA,43.81
2017/09/09 22:12:44,001e0610ba8f,metsense,hih4030,humidity,450,NA
2017/09/09 22:12:44,001e0610ba8f,metsense,htu21d,humidity,NA,41.15
2017/09/09 22:12:44,001e0610ba8f,metsense,htu21d,temperature,NA,24.1
2017/09/09 22:12:44,001e0610ba8f,metsense,metsense,id,00001814B7E8,00001814B7E8
2017/09/09 22:12:44,001e0610ba8f,metsense,pr103j2,temperature,839,NA
```

Sensor data is ordered by ascending timestamp.

Additional information such each node's coordinates or each sensor units can be found
in the metadata. More information about these will be provided in the next two sections.

A sensor values may be marked `NA`, indicating that either the raw or HRF value is
unavailable.

### Node Metadata

The node metadata provides additional information about each of a project's nodes. This
file is a CSV with the following fields:

* `node_id` - ID of node.
* `project_id` - ID of project which manages node.
* `vsn` - Public name for node. The VSN is visible on the physical enclosure.
* `address` - Street address of node installation.
* `lat` - Latitude of node installation.
* `lon` - Longitude of node installation.
* `description` - More detailed description of node's build and configuration.
* `start_timestamp` - Starting timestamp of node installation.
* `end_timestamp` - Ending timestamp of node installation.

These fields will always be provided as a header, for example:
```
node_id,project_id,vsn,address,lat,lon,description,start_timestamp,end_timestamp
001e0610ba46,AoT_Chicago,004,State St & Jackson Blvd Chicago IL,41.878377,-87.627678,AoT Chicago (S) [C],2017/10/09 00:00:00,
001e0610ba3b,AoT_Chicago,006,18th St & Lake Shore Dr Chicago IL,41.858136,-87.616055,AoT Chicago (S),2017/08/08 00:00:00,
001e0610ba8f,AoT_Chicago,00D,Cornell & 47th St Chicago IL,41.810342,-87.590228,AoT Chicago (S),2017/08/08 00:00:00,
001e0610ba16,AoT_Chicago,010,Ohio St & Grand Ave Chicago IL,41.891964,-87.611603,AoT Chicago (S) [C],2017/12/01 00:00:00,2018/06/04 00:00:00
```

Additional details about a node are contained in the description field. The letters
inside the brackets `[ ]` indicate:

* `C` - Node is equipped with chemical sensors.
* `A` - Node is equipped with Alphasense OPN-N2 air quality sensor.
* `P` - Node is equipped with Plantower PMS7003 air quality sensor.

### Sensor Metadata

The sensor metadata provides additional information about each of the sensors published
by the project. This file is a CSV with the following fields:

* `ontology` - Ontology of measurement.
* `subsystem` - Subsystem containing sensor.
* `sensor` - Sensor name.
* `parameter` - Sensor parameter.
* `hrf_unit` - Physical units of HRF value.
* `hrf_minval` - Minimum HRF value according to datasheet. Used as lower bound in range filter.
* `hrf_maxval` - Maximum HRF value according to datasheet. Used as upper bound in range filter.
* `datasheet` - Reference to sensor's datasheet.

These fields will always be provided as a header, for example:
```
ontology,subsystem,sensor,parameter,hrf_unit,hrf_minval,hrf_maxval,datasheet
/sensing/meteorology/pressure,metsense,bmp180,pressure,hPa,300,1100,"https://github.com/waggle-sensor/sensors/blob/master/sensors/airsense/bmp180.pdf"
/sensing/meteorology/temperature,metsense,bmp180,temperature,C,-40,125,"https://github.com/waggle-sensor/sensors/blob/master/sensors/airsense/bmp180.pdf"
/sensing/meteorology/humidity,metsense,hih4030,humidity,RH,0,100,"https://github.com/waggle-sensor/sensors/blob/master/sensors/airsense/htu4030.pdf"
/sensing/meteorology/humidity,metsense,htu21d,humidity,RH,0,100,"https://github.com/waggle-sensor/sensors/blob/master/sensors/airsense/htu21d.pdf"
/sensing/meteorology/temperature,metsense,htu21d,temperature,C,-40,125,"https://github.com/waggle-sensor/sensors/blob/master/sensors/airsense/htu21d.pdf"
```

More in-depth information about each sensor can be found at: https://github.com/waggle-sensor/sensors

### Provenance Metadata

The provenance metadata provides additional information about the origin of this
project digest. This file is a CSV with the following fields:

* `data_format_version` - Data format version.
* `project_id` - Project ID.
* `data_start_date` - Minimum possible publishing UTC timestamp.
* `data_end_date` - Maximum possible publishing UTC timestamp. If no explicit date exists, the creation date is used.
* `creation_date` - UTC timestamp this digest was created.
* `url` - URL where this digest was provided.

These fields will always be provide as a header, for example:
```
data_format_version,project_id,data_start_date,data_end_date,creation_date,url
1,AoT_Chicago.complete,2017/03/31 00:00:00,2018/04/10 15:34:36,2018/04/10 15:34:36,http://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Chicago.complete.latest.tar.gz
```

### Useful Links

* Sensors: https://github.com/waggle-sensor/sensors/blob/develop/README.md
* Array of Things: https://arrayofthings.github.io/
* Waggle: http://wa8.gl/

## Disclaimer

Although our goal is to provide stable metadata files, please consider these as
in-development. If you do write tools which process them, we *strongly* recommend
taking advantage of the metadata headers and processing the files as CSV when applicable in
order to accommodate future changes.

<br><br><br>

In [7]:

# looking at a single row of data transposed

nodes_df.head(1).T


Unnamed: 0,0
node_id,001e0610ba46
project_id,AoT_Chicago
vsn,004
address,State St & Jackson Blvd Chicago IL
lat,41.878377
lon,-87.627678
description,AoT Chicago (S) [C]
start_timestamp,2017/10/09 00:00:00
end_timestamp,


In [8]:

# we will use lat/lon to plot into folium or wherever else 


In [9]:

center = [41.8781, -87.6298]

map_chicago_aot = folium.Map(location=center, zoom_start=10)

#display map

map_chicago_aot



```python


### helpful reference code:


#import the necessary packages
import folium
import pandas as pd
 
# importing the dataset as a csv file,
# and storing it as a dataframe in 'df'
df=pd.read_csv('Volcanoes.txt')
 
# calculating the mean of the latitudes
# and longitudes of the locations of volcanoes
latmean=df['LAT'].mean()
lonmean=df['LON'].mean()
 
# Creating a map object using Map() function.
# Location parameter takes latitudes and
# longitudes as starting location.
# (Map will be centered at those co-ordinates)
map5 = folium.Map(location=[latmean,lonmean],
        zoom_start=6,tiles = 'Mapbox bright')
         
# Function to change the marker color
# according to the elevation of volcano
def color(elev):
    if elev in range(0,1000):
        col = 'green'
    elif elev in range(1001,1999):
        col = 'blue'
    elif elev in range(2000,2999):
        col = 'orange'
    else:
        col='red'
    return col
     
# Iterating over the LAT,LON,NAME and
# ELEV columns simultaneously using zip()
for lat,lan,name,elev in zip(df['LAT'],df['LON'],df['NAME'],df['ELEV']):
    # Marker() takes location coordinates
    # as a list as an argument
    folium.Marker(location=[lat,lan],popup = name,
                  icon= folium.Icon(color=color(elev),
                  icon_color='yellow',icon = 'cloud')).add_to(map5)
                   
# Save the file created above
print(map5.save('test7.html'))


```


In [10]:

for index, nodee in nodes_df.iterrows():
    location = [nodee['lat'], nodee['lon']]
    folium.Marker(location, 
                  tiles = 'Mapbox bright', 
                  popup = f'NODE:\n{nodee["node_id"]}\n\n DESCRIPTION:\n{nodee["description"]}  ADDRESS:\n{nodee["address"]}'
                 ).add_to(map_chicago_aot)

    
#display the map
map_chicago_aot

# save map to html file if you want ! ! ! 
# map_chicago_aot.save('index.html')


In [11]:

# embed the node name and other information when you click on it ! 

# anything else worth doing ?  color code the node by age ? 

# a plot of temperature over time as well pop up ?   (maybe maybe not worth doing)


---
---

In [12]:

center = [41.826881, -87.777789]

map_chicago_aot_2 = folium.Map(location=center, zoom_start=10)

# map_chicago_aot_2

# for index, nodee in nodes_df.iterrows():
#     location = [nodee['lat'], nodee['lon']]
#     folium.Marker(location, 
#                   tiles = 'Mapbox bright', 
#                   popup = f'NODE:\n{nodee["node_id"]}\n\n DESCRIPTION:\n{nodee["description"]}  ADDRESS:\n{nodee["address"]}'
#                  ).add_to(map_chicago_aot)

    
# #display the map
# map_chicago_aot

# # save map to html file if you want ! ! ! 
# # map_chicago_aot.save('index.html')

# for index, franchise in franchises.iterrows():
#     location = [franchise['latitude'], franchise['longitude']]
#     folium.Marker(location, popup = f'Name:{franchise["store"]}\n Revenue($):{franchise["revenue"]}').add_to(map_kenya)

for i in range(0,len(nodes_df)):
   folium.Circle(
      location=[nodes_df.iloc[i]['lat'], nodes_df.iloc[i]['lon']],
      popup=nodes_df.iloc[i]['node_id'],
      radius=20,  #  float(data.iloc[i]['value'])*20000,
      color='blue',
      fill=True,
      fill_color='blue'
   ).add_to(map_chicago_aot_2)

# Show the map again
map_chicago_aot_2

# port this into index.html, host some random website to look smart and cool ...
# or put onto heroku !!!!  

# map_chicago_aot_2.save('sensor_node_locations.html')


---
---