In [1]:
import dask.dataframe as dd

In [2]:
ORIGINAL_DIR = "Original-Data/"
ANALYSIS_DIR = "Analysis-Data/"

In [3]:
df = dd.read_csv(f"{ORIGINAL_DIR}measurements-out.csv", sample_rows=10, dtype={'Device ID': 'float64'})
df.head()

Unnamed: 0,Captured Time,Latitude,Longitude,Value,Unit,Location Name,Device ID,MD5Sum,Height,Surface,Radiation,Uploaded Time,Loader ID
0,2021-12-09 01:59:54.307,42.6986,2.8956,22.0,cpm,,238.0,613a747f2203473a089e3a33b5e699d8,,,,2021-12-09 01:59:54.404897,
1,2021-12-09 01:59:50.469,53.864,-3.047,20.5,cpm,,245.0,2783a57d635cb69f372b13b51a66465f,,,,2021-12-09 01:59:51.034365,
2,2021-12-09 01:59:48,37.366713,140.53097,23.4,status,,100079.0,849b30fc08e85036e90d429fcb9a61e0,462.0,,,2021-12-09 01:59:48.308799,
3,2021-12-09 01:58:36,51.0634,11.7586,16.0,cpm,"Sieglitz, DE",209.0,cc9900720a2ca552934dc0cc70b95d17,,,,2021-12-09 01:59:47.643124,
4,2021-12-09 01:59:47,37.366713,140.53097,14.0,cpm,,100072.0,3703395434168ae96f0a2604aad6899a,462.0,,,2021-12-09 01:59:47.589836,


In [4]:
df = dd.read_csv(f"{ORIGINAL_DIR}measurements-out.csv", usecols=[0, 1, 2, 3, 4])
df.head()

Unnamed: 0,Captured Time,Latitude,Longitude,Value,Unit
0,2021-12-09 01:59:54.307,42.6986,2.8956,22.0,cpm
1,2021-12-09 01:59:50.469,53.864,-3.047,20.5,cpm
2,2021-12-09 01:59:48,37.366713,140.53097,23.4,status
3,2021-12-09 01:58:36,51.0634,11.7586,16.0,cpm
4,2021-12-09 01:59:47,37.366713,140.53097,14.0,cpm


In [5]:
df.Unit.unique().compute()

0              cpm
1           status
2          celcius
3       PM10 ug/m3
4      PM2.5 ug/m3
5              211
6              usv
7              Cpm
8              CPM
9     PM2.5 ug/m3 
10          uSv/hr
11       Inspector
12           HUMD%
13            PM10
14           PM2.5
15             PM1
16           TEMPC
17          usv/hr
18          NOXppm
19           pm2.5
20     DeviceType2
21     DeviceType1
22           vivek
23           uSv/h
24             uSv
25             cpm
26    microsievert
27               1
28               0
29            RSSI
Name: Unit, dtype: object

In [6]:
# Keeping only cpm (counts per minutes)
df = df[df.Unit == 'cpm']

In [7]:
# Renaming columns
df.columns = ['datetime', 'latitude', 'longitude', 'radiation', 'unit']

In [8]:
# Convert cpm to µSv/h
# http://safecast.org/tilemap/methodology.html
df.radiation = df.radiation / 350

In [9]:
# Keep only positive values
df = df[df.radiation > 0]

In [None]:
df = df.compute()

In [None]:
df.head()

# TODO

In [None]:
df.drop(df.columns.difference(['datetime','latitude', 'longitude','radiation']), 1, inplace=True)

In [None]:
# Drop any NA
df.dropna().compute()

In [None]:
df.head()

**Validate latitude and longitude**

In [None]:
lat = df["latitude"]
max_lat = lat.max()
min_lat = lat.min()
print(max_lat)
print(min_lat)

In [None]:
lon = df["longitude"]
max_lon = lon.max()
min_lon = lon.min()
print(max_lon)
print(min_lon)

In [None]:
df['datetime'] = dd.to_datetime(df['datetime'], unit='ns')

In [None]:
df['datetime'].min()

In [None]:
df['datetime'].max()

In [None]:
# Drop any NA
df.dropna().compute()

In [None]:
df.head()

In [None]:
df['datetime'] = df['datetime'].dt.date

In [None]:
df.head()

In [None]:
df = df[df.datetime < dd.to_datetime('2021-03-15')]

In [None]:
df['datetime'].min()

In [None]:
boxplot = df.boxplot(column=['radiation'])

In [None]:
# Identifying outliers with the 1.5xIQR rule
Q1 = df['radiation'].quantile(.25)
Q3 = df['radiation'].quantile(.75)
q1 = Q1-1.5*(Q3-Q1)
q3 = Q3+1.5*(Q3-Q1)

df = df[df['radiation'].between(q1, q3)]

In [None]:
boxplot = df.boxplot(column=['radiation'])

In [None]:
print('Number of measurements: ', df.shape[0])
df.head()

In [None]:
df['radiation'].min()

In [None]:
df['radiation'].max()

In [None]:
df.to_csv(f"{ANALYSIS_DIR}measurements-out-2903-q1q3.csv", index=False)

**Visualization**

In [None]:
import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from datashader.bokeh_ext import create_ramp_legend, create_categorical_legend
import warnings
warnings.filterwarnings('ignore')

from bokeh.io import output_notebook, show

In [None]:
plot_width  = int(800)
plot_height = int(plot_width//1.2)

In [None]:
def draw_map(df, plot_width, plot_height, colors, agg_func, interp, background_col):
    cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)
    agg = cvs.points(df, 'longitude', 'latitude',  agg_func('radiation'))
    img = tf.shade(agg, cmap=colors, how=interp)
    return tf.set_background(img, color=background_col)

In [None]:
img = draw_map(df, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

In [None]:
x_min_jpn, y_min_jpn, x_max_jpn, y_max_jpn = 128.03, 30.22, 148.65, 45.83
df_jpn = df[(df.longitude > x_min_jpn) & (df.longitude < x_max_jpn) & (df.latitude > y_min_jpn) & (df.latitude < y_max_jpn)]

In [None]:
img = draw_map(df_jpn, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

In [None]:
x_min_fk, y_min_fk, x_max_fk, y_max_fk = 140.0166, 37.0047, 141.2251, 38.195
df_fk = df[(df.longitude > x_min_fk) & (df.longitude < x_max_fk) & (df.latitude > y_min_fk) & (df.latitude < y_max_fk)]

In [None]:
img = draw_map(df_fk, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

# Another option

In [None]:
# Identifying outliers with the 1.5xIQR rule
Q1 = df['radiation'].quantile(.25)
Q3 = df['radiation'].quantile(.75)
q1 = Q1-1.5*(Q3-Q1)
q3 = Q3+1.5*(Q3-Q1)
Q4 = df['radiation'].quantile(0.995)

In [None]:
df = df[df['radiation'].between(q1, Q4)]

In [None]:
boxplot = df.boxplot(column=['radiation'])

In [None]:
print('Number of measurements: ', df.shape[0])
df.head()

In [None]:
df['radiation'].min()

In [None]:
df['radiation'].max()

**Visualization**

In [None]:
import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from datashader.bokeh_ext import create_ramp_legend, create_categorical_legend
import warnings
warnings.filterwarnings('ignore')

from bokeh.io import output_notebook, show

In [None]:
plot_width  = int(800)
plot_height = int(plot_width//1.2)

In [None]:
def draw_map(df, plot_width, plot_height, colors, agg_func, interp, background_col):
    cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)
    agg = cvs.points(df, 'longitude', 'latitude',  agg_func('radiation'))
    img = tf.shade(agg, cmap=colors, how=interp)
    return tf.set_background(img, color=background_col)

In [None]:
img = draw_map(df, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

In [None]:
x_min_jpn, y_min_jpn, x_max_jpn, y_max_jpn = 128.03, 30.22, 148.65, 45.83
df_jpn = df[(df.longitude > x_min_jpn) & (df.longitude < x_max_jpn) & (df.latitude > y_min_jpn) & (df.latitude < y_max_jpn)]

In [None]:
img = draw_map(df_jpn, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

In [None]:
x_min_fk, y_min_fk, x_max_fk, y_max_fk = 140.0166, 37.0047, 141.2251, 38.195
df_fk = df[(df.longitude > x_min_fk) & (df.longitude < x_max_fk) & (df.latitude > y_min_fk) & (df.latitude < y_max_fk)]

In [None]:
img = draw_map(df_fk, plot_width, plot_height, inferno, ds.count, 'log', 'black')
img

**Statistics**

In [None]:
import pandas as pd
import numpy as np

import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from datashader.bokeh_ext import create_ramp_legend, create_categorical_legend
import warnings
warnings.filterwarnings('ignore')

from bokeh.io import output_notebook, show

In [None]:
ORIGINAL_DIR = "../Original-Data/"
ANALYSIS_DIR = "../Analysis-Data/"

In [None]:
dtypes = {'latitude': np.float32, 'longitude': np.float32, 'radiation': np.float32}

In [None]:
df = pd.read_csv(f"{ANALYSIS_DIR}measurements-out-2903-q1-995.csv", dtype=dtypes)
df.head()

In [None]:
print('Number of measurements: ', df.shape[0])

In [None]:
df = df.drop_duplicates(subset=['datetime', 'latitude', 'longitude'], keep='last', ignore_index=True)
df.head()

In [None]:
print('Number of measurements: ', df.shape[0])

In [None]:
df.to_csv(f"{ANALYSIS_DIR}measurements-unique-q1-995.csv", index=False)

In [None]:
x_min_jpn, y_min_jpn, x_max_jpn, y_max_jpn = 128.03, 30.22, 148.65, 45.83
df_jpn = df[(df.longitude > x_min_jpn) & (df.longitude < x_max_jpn) & (df.latitude > y_min_jpn) & (df.latitude < y_max_jpn)]
df_jpn.to_csv(f"{ANALYSIS_DIR}measurements-japan-q1-995.csv", index=False)

In [None]:
x_min_fk, y_min_fk, x_max_fk, y_max_fk = 140.0166, 37.0047, 141.2251, 38.195
df_fk = df[(df.longitude > x_min_fk) & (df.longitude < x_max_fk) & (df.latitude > y_min_fk) & (df.latitude < y_max_fk)]
df_fk.to_csv(f"{ANALYSIS_DIR}measurements-fukushima-q1-995.csv", index=False)

In [None]:
def draw_map(df, plot_width, plot_height, colors, agg_func, interp, background_col):
    cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)
    agg = cvs.points(df, 'longitude', 'latitude',  agg_func('radiation'))
    img = tf.shade(agg, cmap=colors, how=interp)
    return tf.set_background(img, color=background_col)

In [None]:
import requests
def get_boundingbox_country(country, output_as='boundingbox'):
    """
    get the bounding box of a country in EPSG4326 given a country name

    Parameters
    ----------
    country : str
        name of the country in english and lowercase
    output_as : 'str
        chose from 'boundingbox' or 'center'. 
         - 'boundingbox' for [latmin, latmax, lonmin, lonmax]
         - 'center' for [latcenter, loncenter]

    Returns
    -------
    output : list
        list with coordinates as str
    """
    # create url
    url = '{0}{1}{2}'.format('http://nominatim.openstreetmap.org/search?country=',
                             country,
                             '&format=json&polygon=0')
    response = requests.get(url).json()[0]

    # parse response to list
    if output_as == 'boundingbox':
        lst = response[output_as]
        output = [float(i) for i in lst]
    if output_as == 'center':
        lst = [response.get(key) for key in ['lat','lon']]
        output = [float(i) for i in lst]
    return output

In [None]:
import matplotlib.pyplot as plt
def create_statistics(radiation_map, country):
    coordinates = get_boundingbox_country(country)
    lat_min, lat_max, lon_min, lon_max = coordinates
    radiation_map = radiation_map[(radiation_map.longitude > lon_min) & (radiation_map.longitude < lon_max) & (radiation_map.latitude > lat_min) & (radiation_map.latitude < lat_max)]
    print("[INFO] Number of measurements:", radiation_map.shape[0])
    print("[INFO] Wait a second. We will provide detailed data in a moment.")
    radiation_map.boxplot(column=['radiation'])
    print(radiation_map.radiation.describe())
    
    plot_width  = int(600)
    plot_height = int(plot_width//1.2)
    img = draw_map(radiation_map, plot_width, plot_height, inferno, ds.count, 'log', 'black')
    return img

In [None]:
vis = create_statistics(df, 'Japan')
vis

In [None]:
vis = create_statistics(df, 'Ireland')
vis

In [None]:
vis = create_statistics(df, 'Sweden')
vis

In [None]:
vis = create_statistics(df, 'Czech Republic')
vis

In [None]:
vis = create_statistics(df, 'Brasil')
vis

In [None]:
vis = create_statistics(df, 'Poland')
vis

In [None]:
vis = create_statistics(df, 'Ukraine')
vis

In [None]:
vis = create_statistics(df, 'Germany')
vis

In [None]:
vis = create_statistics(df, 'China')
vis

In [None]:
vis = create_statistics(df, 'Iraq')
vis

In [None]:
vis = create_statistics(df, 'Nepal')
vis

In [None]:
import matplotlib.pyplot as plt
def create_statistics_for_coordinates(radiation_map, coordinates):
    lon_min, lon_max, lat_min, lat_max = coordinates
    radiation_map = radiation_map[(radiation_map.longitude > lon_min) & (radiation_map.longitude < lon_max) & (radiation_map.latitude > lat_min) & (radiation_map.latitude < lat_max)]
    print("[INFO] Number of measurements:", radiation_map.shape[0])
    print("[INFO] Wait a second. We will provide detailed data in a moment.")
    radiation_map.boxplot(column=['radiation'])
    print(radiation_map.radiation.describe())
    
    plot_width  = int(600)
    plot_height = int(plot_width//1.2)
    img = draw_map(radiation_map, plot_width, plot_height, inferno, ds.count, 'log', 'black')
    return img

In [None]:
coordinates = {"Fukushima": [140.0166, 141.2251, 37.0047, 38.195], 
               "Netherlands": [3.10, 7.32, 50.73, 53.56],}

Fukushima

In [None]:
vis = create_statistics_for_coordinates(df, coordinates["Fukushima"])
vis

Netherlands

In [None]:
vis = create_statistics_for_coordinates(df, coordinates["Netherlands"])
vis