In [1]:
# code set-up

# Import statements
import pandas as pd
import os
import datetime as dt

# Directories for reading and writing data
thedir = os.getcwd()
writedir = os.path.abspath(os.path.join(os.path.dirname(thedir), '..', 'data/interim'))
extdir = os.path.abspath(os.path.join(os.path.dirname(thedir), '..', 'data/external'))

In [2]:
# load the data files
# I have commented out the unneeded analysis for this application to the Bern 2018 data

# Data from Biel July 2022
#data_lcd = pd.read_csv(F'{writedir}/alldata.csv')

# data from closesent OpenSenseMap sensor (Brügg) here: https://familie-hoffmann.me/ 
# data_os = pd.read_csv(F'{extdir}/opensense-temp.csv')

# data with metadata from 2018 Bern Stadtklima study
data_be = pd.read_csv(F'{extdir}/2018_Stadtklimamessnetz_Rohdaten.csv')
data_be_meta = pd.read_csv(F'{extdir}/2018_Stadtklimamessnetz_Metadaten.csv')

# data without metadata from select stations
data_test = pd.read_csv(F'{extdir}/ref_2018.csv')

# archival mean data 1983
#archive_ = pd.read_csv(F"{writedir}/archive_1983.csv")
#archive_.drop(['Unnamed: 0'],axis=1,inplace=True)



# key names that describe the choice
# load dictionary

In [3]:
# data corrections to fit into class objects. This is unique to each data set.

# metadata correction for Bern
# this block makes the metadata for the bern sensors easily readable into the class as dicts later on
def make_logger(x):
    data = 'Log_' + str(x)
    return data

def make_str(x):
    return str(x)

def get_city(x):
    if "Zoll" in x:
        return "Zollikofen"
    elif "Uett" in x:
        return "Uettligen"
    else:
        return "Bern"

data_be_meta['logger'] = data_be_meta.LogNr_2018.apply(lambda x: make_logger(x))
data_be_meta['gps_lat'] = data_be_meta.NORD_CHTOPO.apply(lambda x: make_str(x))
data_be_meta['gps_lon'] = data_be_meta.OST_CHTOPO.apply(lambda x: make_str(x))
data_be_meta['coord'] = '(' + data_be_meta.gps_lat + ',' + data_be_meta.gps_lon + ')'
data_be_meta.set_index('logger',inplace=True)
data_be_meta = data_be_meta[['coord','Name','ELEV_CHTOPO']].copy()
data_be_meta['city'] = data_be_meta.Name.apply(lambda x: get_city(x))

# correct date-time column for each set of loaded data

def fix_date_string(x):
    x = x[:10] + " " + x[11:-5]
    return x

#data_os['time_int'] = data_os.createdAt.apply(lambda x: fix_date_string(x))
#data_os['md'] = pd.to_datetime(data_os.time_int,format = '%Y/%m/%d %H:%M:%S')
#data_lcd["md"] = pd.to_datetime(data_lcd.time,format = '%Y/%m/%d %H:%M:%S')
data_be['md'] = pd.to_datetime(data_be['Zeit'], infer_datetime_format=True)
#archive_['md'] = pd.to_datetime(archive_['index'], infer_datetime_format=True)
data_test['md'] = pd.to_datetime(data_test['Date_time_CET'], infer_datetime_format=True)

# split data for data test into low cost device and automatic weather stations
data_aws = data_test[['BOLL_AWS_TEMP', 'BOLL_PRECIP', 'BOLL_RADIATION_GLOBAL', 'BOLL_WIND_SPEED_mean','ZOLL_AWS_TEMP', 'ZOLL_RADIATION_GLOBAL', 'ZOLL_SUNSHINE', 'ZOLL_PRECIP', 
                   'ZOLL_WIND_SPEED_MEAN','AFU_AWS_TEMP', 'AFU_WIND_SPEED_MEAN', 'AFU_RADIATION_GLOBAL','md']]
data_lcd = data_test[['BOLL_LCD_TEMP','ZOLL_2m_LCD_TEMP',
       'ZOLL_3m_LCD_TEMP','md','AFU_LCD_TEMP','ZOLL_STEVENSON_LCD_TEMP']]

## Create sensor classes
This section reads in the sensor classes by chunk of data (biel data, bern data, archive_ data, opensensor data, reference data)

Note that the variables are instatiated as `None` as default

There is a base `Sensor` Class and four subclasses: `LowCostSensors AutoSensors OpenSensors archive_Sensors`

The class `LowCostSensors` is for the LCD sensors that this study is based around. `AutoSensors` are for the automatic weather station data. `OpenSensors` is for readings from the OpenSenseMap network. `archive_Sensors` are for data from the archive_.

In [4]:
# base sensor class inherited by the different sensor type subclasses
class Sensors(object):
    def __init__(self, project=None, year=None, name=None, city=None, sensor_type=None, data = None, qty=None, elev=None, coord=None, quant = None):
        self.project = project
        self.year = year
        self.name = name
        self.city = city
        self.data = data
        self.qty = qty
        self.sensor_type = sensor_type
        self.coord = coord
        self.elev = elev
        self.quant = quant
    def get_month(self):
        x = self.data
        x['month'] = x.md.dt.to_period("m")
        return x
    def get_year(self):
        x = self.data
        x['year'] = x.md.dt.to_period("y")
        return x

In [5]:
# the two sensors class used here correspond to the AWS sensors and the LCD sensors

class AutoSensors(Sensors):
    ## date time functions
    def get_day(self):
        x = self.data
        x['day'] = x.md.dt.to_period("d")
        return x
    
    ## basic stats functions
    def get_mean(self,time):
        x = self.data
        print(x.head())
        data = x.groupby([time]).agg('mean')
        return data
    def get_max(self,time):
        x = self.data
        print(x.head())
        data = x.groupby([time]).agg('max')
        a = data.copy()
        return a
    def get_min(self,time):
        x = self.data
        data = x.groupby([time]).agg('min')
        return data
    def get_var(self,time):
        x = self.data
        data = x.groupby([time]).agg('var')
        return data

class LowCostSensors(Sensors):
    
    ## date time functions
    def get_day(self):
        x = self.data
        x['day'] = x.md.dt.to_period("d")
        return x
    
    ## basic stats functions
    def get_mean(self,time):
        x = self.data
        data = x.groupby([time]).agg('mean')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_max(self,time):
        x = self.data
        data = x.groupby([time]).agg('max')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_min(self,time):
        x = self.data
        data = x.groupby([time]).agg('min')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_var(self,time):
        x = self.data
        data = x.groupby([time]).agg('var')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    
    ## elevation and height corrections and Swiss Meteo corrections
    
    ## Radiation
    
    ## humidity
    
    # homogenization with Swiss meteo

In [6]:
# read in opensensor
#os_1 = OpenSensors('OpenSense',2022,'Gumme Brügg','Brügg','custom',data_os,'Temperature',469,(47.126465,7.285249))

In [7]:
# read in Biel Sensors
elevation= {"Log_202":430, "Log_201":432,"Log_203":433,"Log_204":430,"Log_205":439,"Log_206":437,"Log_207":430}
coord= {"Log_202":(47.130669,7.236258), "Log_201":(47.130792,7.241046),"Log_203": (47.136637, 7.246960),"Log_204":(47.141086,7.253485),"Log_205":(47.144746,7.265149),"Log_206":(47.138338,7.295326),"Log_207":(47.179081,7.415102)}
city= {"Log_202":'Biel', "Log_201":'Biel',"Log_203":'Biel',"Log_204":'Biel',"Log_205":'Biel',"Log_206":'Orpund',"Log_207":'Grenchen'}

log_cols = [ x for x in data_lcd.columns if x[:3] == "Log" ]

mylist = []
for i in log_cols:
    data = data_lcd[['md',i]].copy()
    mylist.append(LowCostSensors('biel-temps',2022,i,city[i],'lcd',data,'Temperature',elevation[i],coord[i]))
biel_sensors = mylist

In [8]:
# read in bern sensors for 2018

## Log 64 and Log 63 do not correspond in metadata and data files for bern 2021

# checking dict keys
#a = data_be.columns
#b = data_be_meta.index
#mylist = []
#for x in a:
#    if x not in b:
#        mylist.append(x)
#    else:
#        mylist.append("OK")

# uncomment to correct bern 2021 metadata document        
data_be['Log_63'] = data_be['Log_64']
data_be.drop(['Log_64'],axis = 1,inplace=True)
data_be['Log_83'] = data_be['Log_83_REF_AFU_3m']
data_be['Log_98'] = data_be['Log_98_REF_ZOLL_2m']
data_be['Log_99'] = data_be['Log_99_ZOLL_3m']
data_be['Log_999'] = data_be['Log_999_REF_ZOLL_HAUS']
data_be.drop(['Log_83_REF_AFU_3m','Log_98_REF_ZOLL_2m','Log_99_ZOLL_3m','Log_999_REF_ZOLL_HAUS'],axis = 1,inplace=True)

log_cols = [ x for x in data_be.columns if x[:3] == "Log" ]
data_be_meta
dict_city = data_be_meta['city'].to_dict()
dict_coord = data_be_meta['city'].to_dict()
dict_elev = data_be_meta['city'].to_dict()
mylist = []
for i in log_cols:
    data = data_be[['md',i]].copy()
    mylist.append(LowCostSensors('bern-temps',2021,i,dict_city[i],'lcd',data,'Temperature',dict_elev[i],dict_coord[i]))
bern_sensors = mylist

In [9]:
# read in archive_ sensors
#elevation = {'mai':720, 'vog':544, 'boz':441}
#coord = {'mai':(47.145999,7.242621), 'vog': (47.12456,7.242723), 'boz': (47.15189,7.272195)}
#city = {'mai':'evilard', 'vog':'biel', 'boz':'biel'}

#lgm = [ x for x in archive_.columns if "mai" in x]
#lgv = [ x for x in archive_.columns if "vog" in x]
#lgb = [ x for x in archive_.columns if "boz" in x]
#lgm.append('md')
#lgv.append('md')
#lgb.append('md')

#cols = [lgm,lgv,lgb]

#mylist = []
#for i in cols:
#    data = archive_[i].copy()
#    if 'mai' in i[0]:
#        key = 'mai'
#        mylist.append(archive_Sensors('archive_',1983,key,city[key],'lcd',data,'Temperature',elevation[key],coord[key]))
#    elif 'boz' in i[0]:
#        key = 'boz'
#        mylist.append(archive_Sensors('archive_',1983,key,city[key],'lcd',data,'Temperature',elevation[key],coord[key]))
#    elif 'vog' in i[0]:
#        key = 'vog'
#        mylist.append(archive_Sensors('archive_',1983,key,city[key],'lcd',data,'Temperature',elevation[key],coord[key]))
#archival = mylist

In [10]:
# read in bern aws data
log_cols_z = [ x for x in data_.columns if x[:3] == "ZOL" ]
log_cols_b = [ x for x in data_.columns if x[:3] == "BOL" ]
log_cols_a = [ x for x in data_.columns if x[:3] == "AFU" ]
a = [log_cols_z,log_cols_b,log_cols_a]
mylist = []
for j in a:
    for i in j:
        data = data_[['md',i]].copy()
        if 'TEMP' in i:
            mylist.append(AutoSensors('bern-refs',2018,i,'Bern','lcd',data,'Temperature'))
        elif 'WIND' in i:
            mylist.append(AutoSensors('bern-refs',2018,i,'Bern','lcd',data,'Wind_Speed'))
        elif 'PRECIP' in i:
            mylist.append(AutoSensors('bern-refs',2018,i,'Bern','lcd',data,'Precipitation'))
        elif 'RADI' in i:
            mylist.append(AutoSensors('bern-refs',2018,i,'Bern','lcd',data,'Radiation'))
        elif 'SUNS' in i:
            mylist.append(AutoSensors('bern-refs',2018,i,'Bern','lcd',data,'Sunshine'))
        
aws_sensors = mylist

NameError: name 'data_' is not defined

In [None]:
# get month and day columns for aggregation
[ x.get_month() for x in archival ]
[ x.get_month() for x in biel_sensors ]
[ x.get_day() for x in biel_sensors ]
[ x.get_month() for x in bern_sensors ]
[ x.get_day() for x in bern_sensors ]
[ x.get_month() for x in aws_sensors ]
[ x.get_day() for x in aws_sensors ]
#os_1.get_month()
#os_1.get_day()
x = 0

In [None]:
mean_day = [x.get_mean('day') for x in bern_sensors]
max_day = [x.get_max('day') for x in bern_sensors]
min_day = [x.get_min('day') for x in bern_sensors]

mean_day_a = [x.get_mean('day') for x in aws_sensors]
max_day_a = [x.get_max('day') for x in aws_sensors]
min_day_a = [x.get_min('day') for x in aws_sensors]

In [None]:
aws_sensors[2].get_max('month')

In [None]:
aws_sensors[0].get_max('month')

In [None]:
def make_plottable(x):
    data = pd.concat(x,axis = 1)
    return data

In [None]:
a = make_plottable(max_day_a)

In [None]:
min_day[6]