In [1]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import csv
from math import sin, cos, sqrt, atan2, radians

In [2]:
#read point of interest data
poi_data = pd.read_csv("data/nodub/hotspot_2015.csv", parse_dates=['datetime'], usecols=['latitude','longitude','datetime', 'power'])
poi_data.head()

Unnamed: 0,datetime,latitude,longitude,power
0,2015-06-27 04:00:00,-13.2,134.0,3.2
1,2015-10-19 04:00:00,-37.9,145.3,3.2
2,2015-10-20 02:00:00,-32.1,115.9,3.4
3,2015-10-20 02:00:00,-31.9,115.9,3.4
4,2015-03-24 02:00:00,-31.9,115.9,3.5


In [3]:
poi_data.sort_values(["datetime", "latitude", "longitude"],axis=0,inplace=True)
poi_data.head()

Unnamed: 0,datetime,latitude,longitude,power
18255,2015-01-01 01:00:00,-17.2,145.9,10.0
28109,2015-01-01 01:00:00,-15.9,136.6,11.9
38325,2015-01-01 01:00:00,-15.1,144.4,14.2
1704,2015-01-01 02:00:00,-32.1,115.8,6.4
9116,2015-01-01 02:00:00,-32.1,115.9,8.3


In [4]:
print (poi_data.shape)


(82583, 4)


In [5]:
if len(poi_data.columns) < 3  or'datetime' not in poi_data.columns or 'latitude' not in poi_data.columns or 'longitude' not in poi_data.columns:
    
    print ("Wrong format!")

In [6]:
#read the consolidated sesnosrs data 
data_sources = ['ta', 'wd', 'ws', 'rh', 'fsd']#, 'precip']
sensors_data = []
for i,d in  enumerate(data_sources):
    tmp = pd.read_csv("data/nodub/"+d+".csv", parse_dates=['datetime'])
    if len(tmp.columns) < 4 or 'value' not in tmp.columns or 'datetime' not in tmp.columns or 'latitude' not in tmp.columns or 'longitude' not in tmp.columns:
        print ("Wrong format!")
        continue
    print (tmp.shape)
    tmp.sort_values(["datetime", "latitude", "longitude"],axis=0,inplace=True)
    sensors_data.append(tmp)
if len(sensors_data) != len(data_sources):
    print ("Sizes don't match")
sensors_data[0].head()

(29930, 4)
(29930, 4)
(29930, 4)
(29930, 4)
(29930, 4)


Unnamed: 0,datetime,latitude,longitude,value
0,2015-01-01,-43.1,146.6,12.459337
1,2015-01-01,-42.4,147.4,13.768324
2,2015-01-01,-41.6,147.4,14.542662
3,2015-01-01,-40.9,145.9,13.806694
4,2015-01-01,-40.1,145.1,15.747588


In [None]:
def isInArea(lt1, lng1, lt2, lng2, radius):
    # approximate radius of earth in km
    R = 6373.0

    lat1 = radians(float(lt1))
    lon1 = radians(float(lng1))
    lat2 = radians(float(lt2))
    lon2 = radians(float(lng2))

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c # in kms
    
    return  distance < radius
   

In [None]:
items_completed = 0


interval = 3
radius = 100
csv_file =  open('hotspots_with_average_sensors_data.csv', 'w', 1)
writer = csv.writer(csv_file)
row = ["datetime","latitude","longitude"]


for l in data_sources :
    poi_data[l] = np.zeros([len(poi_data)])
    row.append(l)
    
if not items_completed:
    print (row)
    writer.writerow(row)
else:
    items_completed += 1
    
offset =  pd.DateOffset(interval)

start = 0
for i,d in enumerate(poi_data.datetime):
    
        #i += items_completed
        
        row = [d,round(poi_data.latitude[i],1),round(poi_data.longitude[i],1)]
        for j,l in enumerate(data_sources):
            summary = 0;
            count = 0
            for k,s in enumerate(sensors_data[j].datetime[start:]):
                
                if s > d:
                    break
                if s < d-offset:
                    start = k
                    continue
                    
                if isInArea(poi_data.latitude[i], poi_data.longitude[i], sensors_data[j].latitude[k], sensors_data[j].longitude[k], radius) :
                    #print (str(i+100)+","+str(k)+": "+ str(sensors_data[j].value[k]))
                    summary += float(sensors_data[j].value[k])
                    #print (sensors_data[j].value[k])
                    count += 1
            if count and summary > 0:
                #print ("Sum: " + str(summary) + " Count: " + str(count))
                poi_data.loc[l,i] = (summary/count)
                row.append(summary/count)
            else:
                print (str(i) +". " + str(d) + " (" + str(poi_data.latitude[i])  + "," + str(poi_data.longitude[i]) +  ") - " + l + " is missing." )
                break
        
        if len(row) == (len(data_sources)+3):
            writer.writerow(row)
            
            
            

                    
poi_data.head()
csv_file.close()

['datetime', 'latitude', 'longitude', 'ta', 'wd', 'ws', 'rh', 'fsd']
0. 2015-01-01 01:00:00 (-13.2,134.0) - ta is missing.
14. 2015-01-01 03:00:00 (-31.4,151.1) - ta is missing.
15. 2015-01-01 03:00:00 (-33.9,118.0) - ta is missing.
17. 2015-01-01 03:00:00 (-17.9,135.9) - ta is missing.
18. 2015-01-01 03:00:00 (-33.0,116.9) - ta is missing.
19. 2015-01-01 03:00:00 (-18.8,131.3) - ta is missing.
20. 2015-01-01 03:00:00 (-13.7,134.3) - ta is missing.
23. 2015-01-01 03:00:00 (-14.7,141.6) - ta is missing.
24. 2015-01-01 03:00:00 (-12.5,136.2) - ta is missing.
26. 2015-01-01 03:00:00 (-28.7,151.9) - ta is missing.
34. 2015-01-01 03:00:00 (-15.3,128.2) - ta is missing.
35. 2015-01-01 03:00:00 (-16.1,130.3) - ta is missing.
36. 2015-01-01 05:00:00 (-16.3,125.7) - ta is missing.
37. 2015-01-01 05:00:00 (-23.2,123.2) - ta is missing.
38. 2015-01-01 05:00:00 (-14.5,133.2) - ta is missing.
41. 2015-01-01 14:00:00 (-15.2,131.2) - ta is missing.
48. 2015-01-02 00:00:00 (-16.0,125.7) - ta is missin