In [1]:
import pandas as pd
import os
import datetime as dt

thedir = os.getcwd()
writedir = os.path.abspath(os.path.join(os.path.dirname(thedir), '..', 'data/interim'))
extdir = os.path.abspath(os.path.join(os.path.dirname(thedir), '..', 'data/external'))

In [2]:
# create archival data

maison_blanche_mean = [8.5,10.7,14.3,15.1,17.3,"nan","nan"]
vogelsang_mean = [10.3,12.2,16.1,"nan","nan",14.7,9.4]
boezingenmoos_mean = [10.2,12.1,16.1,16.5,17.9,14.5,9.2]
maison_min = [-1.2,0.6,6.5,6.2,6.2,3.1,'nan']
vogel_min = [0.7,2.5,3.7,8.6,'nan',4.4,0.9]
boz_min = [-1.9,0.6,6.5,6.2,6.2,3.1,-0.9]
maison_max = [18.9,22.3,25.5,24.4,26.4,20.3,18.4]
vogel_max = [21.8,24.3,27.9,27.7,29.6,22.4,20.5]
boz_max = [22.1,25.3,29.6,28.6,31.2,24.1,21.8,]

maison=pd.DataFrame([maison_min,maison_max,maison_blanche_mean],columns = ['apr','may','jun','jul','aug','sep','oct'],index = ['min','max','mean'])
maison.columns = [x + '-' + '1983' for x in maison.columns]
maison = maison.transpose()
maison.columns = [x + '-' + 'maison' for x in maison.columns]

vogel=pd.DataFrame([vogel_min,vogel_max,vogelsang_mean],columns = ['apr','may','jun','jul','aug','sep','oct'],index = ['min','max','mean'])
vogel.columns = [x + '-' + '1983' for x in vogel.columns]
vogel = vogel.transpose()
vogel.columns = [x + '-' + 'vogel' for x in vogel.columns]

boz=pd.DataFrame([boz_min,boz_max,boezingenmoos_mean],columns = ['apr','may','jun','jul','aug','sep','oct'],index = ['min','max','mean'])
boz.columns = [x + '-' + '1983' for x in boz.columns]
boz = boz.transpose()
boz.columns  = [x + '-' + 'boz' for x in boz.columns]

archive = pd.concat([boz,maison,vogel],axis = 1)

archive.reset_index(inplace=True, drop = False)

In [5]:
# mydata
data_lcd = pd.read_csv(F'{writedir}/alldata.csv')

# data from here: https://familie-hoffmann.me/ 
data_os = pd.read_csv(F'{extdir}/opensense-temp.csv')

# data from here: https://boris.unibe.ch/161882/
data_be = pd.read_csv(F'{extdir}/2018_Stadtklimamessnetz_Rohdaten.csv')
data_be_meta = pd.read_csv(F'{extdir}/2018_Stadtklimamessnetz_Metadaten.csv')

# test data 2018
data_test = pd.read_csv(F'{extdir}/ref_2018.csv')

# correct date-time column
def fix_date_string(x):
    x = x[:10] + " " + x[11:-5]
    return x

data_os['time_int'] = data_os.createdAt.apply(lambda x: fix_date_string(x))
data_os['md'] = pd.to_datetime(data_os.time_int,format = '%Y/%m/%d %H:%M:%S')
data_lcd["md"] = pd.to_datetime(data_lcd.time,format = '%Y/%m/%d %H:%M:%S')
data_be['md'] = pd.to_datetime(data_be['Zeit'], infer_datetime_format=True)
archive['md'] = pd.to_datetime(archive['index'], infer_datetime_format=True)

In [8]:
data_be_meta

Unnamed: 0,LogNr_2018,Name,NORD_CHTOPO,OST_CHTOPO,ELEV_CHTOPO
0,1,Bubenbergplatz,46.94729,7.43849,542.5
1,2,"Ostermundigen, Lötschenstr 13",46.95289,7.48707,551.5
2,3,Thormannstrasse 62,46.93888,7.45030,526.9
3,4,Thunplatz,46.94065,7.45822,551.6
4,5,PH Von Roll Dach (Solar),46.95382,7.42310,553.9
...,...,...,...,...,...
80,98,Zollikofen Referenz 2m,46.99080,7.46403,552.8
81,99,Zollikofen Referenz 3m,46.99080,7.46403,552.8
82,100,Stadtlabor (2.45m),46.96312,7.46255,554.5
83,101,"Steinhölzliwald, (Zugang: Goumoenstr. 58)",46.93514,7.42805,573.7


In [11]:
def make_logger(x):
    data = 'Log_' + str(x)
    return data

def make_str(x):
    return str(x)

def get_city(x):
    if "Zoll" in x:
        return "Zollikofen"
    elif "Uett" in x:
        return "Uettligen"
    else:
        return "Bern"

data_be_meta['logger'] = data_be_meta.LogNr_2018.apply(lambda x: make_logger(x))
data_be_meta['gps_lat'] = data_be_meta.NORD_CHTOPO.apply(lambda x: make_str(x))
data_be_meta['gps_lon'] = data_be_meta.OST_CHTOPO.apply(lambda x: make_str(x))
data_be_meta['coord'] = '(' + data_be_meta.gps_lat + ',' + data_be_meta.gps_lon + ')'
data_be_meta.set_index('logger',inplace=True)
data_be_meta = data_be_meta[['coord','Name','ELEV_CHTOPO']].copy()
data_be_meta['city'] = data_be_meta.Name.apply(lambda x: get_city(x))

In [12]:
class Sensors(object):
    def __init__(self, project=None, year=None, name=None, city=None, sensor_type=None, data = None, elev=None, coord=None, quant = None):
        self.project = project
        self.year = year
        self.name = name
        self.city = city
        self.data = data
        self.sensor_type = sensor_type
        self.coord = coord
        self.elev = elev
        self.quant = quant
    def get_month(self):
        x = self.data
        x['month'] = x.md.dt.to_period("m")
        return x
    def get_year(self):
        x = self.data
        x['year'] = x.md.dt.to_period("y")
        return x

class OpenSensors(Sensors):    
    def get_day(self):
        x = self.data
        x['day'] = x.md.dt.to_period("d")
        return x
    def get_mean(self,time):
        x = self.data
        data = x.groupby([time]).agg('mean')
        a = [x for x in data.columns if x[:3] == "val"]
        data[self.name] = data[a]
        return data[[self.name]]
    def get_max(self,time):
        x = self.data
        data = x.groupby([time]).agg('max')
        a = [x for x in data.columns if x[:3] == "val"]
        data[self.name] = data[a]
        return data[[self.name]]
    def get_min(self,time):
        x = self.data
        data = x.groupby([time]).agg('min')
        a = [x for x in data.columns if x[:3] == "val"]
        data[self.name] = data[a]
        return data[[self.name]]
    def get_var(self,time):
        x = self.data
        data = x.groupby([time]).agg('var')
        a = [x for x in data.columns if x[:3] == "val"]
        data[self.name] = data[a]
        return data[[self.name]]
class ArchiveSensors(Sensors):
    
    ## basic stats functions
    def get_mean(self):
        data = self.data
        a = [x for x in data.columns if x[:3] == "mea"]
        data.set_index('month',inplace=True,drop=False)
        return data[a]
    def get_max(self):
        data = self.data
        a = [x for x in data.columns if x[:3] == "max"]
        data.set_index('month',inplace=True,drop=False)
        return data[a]
    def get_min(self):
        data = self.data
        a = [x for x in data.columns if x[:3] == "min"]
        data.set_index('month',inplace=True,drop=False)
        return data[a]
    

class LowCostSensors(Sensors):
    
    ## date time functions
    def get_day(self):
        x = self.data
        x['day'] = x.md.dt.to_period("d")
        return x
    
    ## basic stats functions
    def get_mean(self,time):
        x = self.data
        data = x.groupby([time]).agg('mean')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_max(self,time):
        x = self.data
        data = x.groupby([time]).agg('max')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_min(self,time):
        x = self.data
        data = x.groupby([time]).agg('min')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    def get_var(self,time):
        x = self.data
        data = x.groupby([time]).agg('var')
        a = [x for x in data.columns if x[:3] == "Log"]
        return data[a]
    
    ## elevation and height corrections and Swiss Meteo corrections
    
    ## Radiation
    
    ## humidity
    
    # homogenization with Swiss meteo

In [13]:
# read in opensensor
os_1 = OpenSensors('OpenSense',2022,'Gumme Brügg','Brügg','custom',data_os,469,(47.126465,7.285249))

# read in Biel Sensors
elevation= {"Log_202":430, "Log_201":432,"Log_203":433,"Log_204":430,"Log_205":439,"Log_206":437,"Log_207":430}
coord= {"Log_202":(47.130669,7.236258), "Log_201":(47.130792,7.241046),"Log_203": (47.136637, 7.246960),"Log_204":(47.141086,7.253485),"Log_205":(47.144746,7.265149),"Log_206":(47.138338,7.295326),"Log_207":(47.179081,7.415102)}
city= {"Log_202":'Biel', "Log_201":'Biel',"Log_203":'Biel',"Log_204":'Biel',"Log_205":'Biel',"Log_206":'Orpund',"Log_207":'Grenchen'}

log_cols = [ x for x in data_lcd.columns if x[:3] == "Log" ]

mylist = []
for i in log_cols:
    data = data_lcd[['md',i]].copy()
    mylist.append(LowCostSensors('biel-temps',2022,i,city[i],'lcd',data,elevation[i],coord[i]))
biel_sensors = mylist

# read in bern sensors

## Log 64 and Log 63 do not correspond in metadata and data files.

# checking dict keys
#a = data_be.columns
#b = data_be_meta.index
#mylist = []
#for x in a:
#    if x not in b:
#        mylist.append(x)
#    else:
#        mylist.append("OK")

        
data_be['Log_63'] = data_be['Log_64']
data_be.drop(['Log_64'],axis = 1,inplace=True)


log_cols = [ x for x in data_be.columns if x[:3] == "Log" ]
data_be_meta
dict_city = data_be_meta['city'].to_dict()
dict_coord = data_be_meta['city'].to_dict()
dict_elev = data_be_meta['city'].to_dict()
mylist = []
for i in log_cols:
    data = data_be[['md',i]].copy()
    mylist.append(LowCostSensors('bern-temps',2021,i,dict_city[i],'lcd',data,dict_elev[i],dict_coord[i]))
bern_sensors = mylist


# read in archive sensors
elevation = {'mai':720, 'vog':544, 'boz':441}
coord = {'mai':(47.145999,7.242621), 'vog': (47.12456,7.242723), 'boz': (47.15189,7.272195)}
city = {'mai':'evilard', 'vog':'biel', 'boz':'biel'}

lgm = [ x for x in archive.columns if "mai" in x]
lgv = [ x for x in archive.columns if "vog" in x]
lgb = [ x for x in archive.columns if "boz" in x]
lgm.append('md')
lgv.append('md')
lgb.append('md')

cols = [lgm,lgv,lgb]

mylist = []
for i in cols:
    data = archive[i].copy()
    if 'mai' in i[0]:
        key = 'mai'
        mylist.append(ArchiveSensors('archive',1983,key,city[key],'lcd',data,elevation[key],coord[key]))
    elif 'boz' in i[0]:
        key = 'boz'
        mylist.append(ArchiveSensors('archive',1983,key,city[key],'lcd',data,elevation[key],coord[key]))
    elif 'vog' in i[0]:
        key = 'vog'
        mylist.append(ArchiveSensors('archive',1983,key,city[key],'lcd',data,elevation[key],coord[key]))
archival = mylist

KeyError: 'Log_83_REF_AFU_3m'

In [16]:
# get month and day columns for aggregation
[ x.get_month() for x in archival ]
[ x.get_month() for x in biel_sensors ]
[ x.get_day() for x in biel_sensors ]
[ x.get_month() for x in bern_sensors ]
[ x.get_day() for x in bern_sensors ]
os_1.get_month()
os_1.get_day()
x = 0

In [10]:
mean_day = [x.get_mean('day') for x in bern_sensors]
max_day = [x.get_max('day') for x in bern_sensors]
min_day = [x.get_min('day') for x in bern_sensors]

mean_day = [x.get_mean('day') for x in bern_sensors]
max_day = [x.get_max('day') for x in bern_sensors]
min_day = [x.get_min('day') for x in bern_sensors]


In [11]:
os_1.get_max('month')

In [12]:
max_day[1].columns

In [13]:
max_day[3]

In [14]:
min_day[6]