In [6]:
#import packages 
import pandas as pd
from io import StringIO
import matplotlib as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from datetime import datetime
import seaborn as sns
from bokeh.io import show, output_notebook, reset_output
from bokeh.plotting import figure
from bokeh.plotting import ColumnDataSource
from bokeh.io import export_png



output_notebook()

# Anlysis of the Bee Hive Data from Schwartau

In [7]:
temp_sch = "../capstone_working/bee_hive_data/temperature_schwartau.csv"
flow_sch = "../capstone_working/bee_hive_data/flow_schwartau.csv"
weight_sch = "../capstone_working/bee_hive_data/weight_schwartau.csv"
humidity_sch = "../capstone_working/bee_hive_data/humidity_schwartau.csv"

In [8]:
def mean_per_hour(ifile):
    '''group data per day: sum of flow (=netflow) and mean temperature, weight and humidity'''
    name = str(ifile)
    bee_hive = pd.read_csv(ifile, sep=',', decimal=".")
    bee_hive['timestamp'] = pd.to_datetime(bee_hive['timestamp'])
    bee_hive.sort_values(by="timestamp") 
    bee_hive.set_index('timestamp', inplace=True) 
    bee_hive = bee_hive.groupby(pd.Grouper(freq='H')).mean()
    bee_hive.fillna(method='ffill', inplace=True) 
    return bee_hive 

#calling mean_per_hour function on the input files
netflow_sch = mean_per_hour(flow_sch)
temperature_sch = mean_per_hour(temp_sch)
weight_sch = mean_per_hour(weight_sch)
humidity_sch = mean_per_hour(humidity_sch)

## Netflow <br>
Inflow (green) and Outflow (red) per hour

In [9]:
source1 = ColumnDataSource(netflow_sch[netflow_sch>0])
source2 = ColumnDataSource(netflow_sch[netflow_sch<0])
plot1 = figure(title="Flow per hour",x_axis_type='datetime', x_axis_label ="Date", 
               y_axis_label ="Flow")
plot1.line(x = 'timestamp', y = 'flow', color='red', source= source1)
plot1.line(x = 'timestamp', y='flow', color='green', source=source2)
export_png(plot1, filename="/plots/plot1.png")
show(plot1)

RuntimeError: Neither firefox and geckodriver nor a variant of chromium browser and chromedriver are available on system PATH. You can install the former with 'conda install -c conda-forge firefox geckodriver'.

## Humidity <br>

Humidity over Hour. With Optimum between 50% and 60%.

In [10]:
source3 = ColumnDataSource(humidity_sch[humidity_sch >= 60])
source4 = ColumnDataSource(humidity_sch[(humidity_sch > 50) & (humidity_sch < 60)])
source5 = ColumnDataSource(humidity_sch[humidity_sch <= 50])
plot2 = figure(title = "Humidity per hour", x_axis_type = 'datetime', x_axis_label = "Date", 
               y_axis_label = "Humidity")
plot2.scatter(x = 'timestamp', y = 'humidity', color = 'red', source = source3)
plot2.scatter(x = 'timestamp', y ='humidity', color = 'green', source = source4)
plot2.scatter(x = 'timestamp', y ='humidity', color = 'red', source = source5)
show(plot2)

## Temperature <br>
Temperature of the Hive over time. With optimal Temperature between 30 and 35 °C.

In [32]:
source6 = ColumnDataSource(temperature_sch[temperature_sch >= 35])
source7 = ColumnDataSource(temperature_sch[(temperature_sch > 30) & (temperature_sch < 35)])
source8 = ColumnDataSource(temperature_sch[temperature_sch <= 30])
plot3 = figure(title = "Temperature per hour", x_axis_type = 'datetime', x_axis_label = "Date", 
               y_axis_label = "Humidity")
plot3.line(x = 'timestamp', y = 'temperature', color = 'red', alpha=0.6,source = source6, line_width=2)
plot3.line(x = 'timestamp', y ='temperature', color = 'green',alpha=0.6, source = source7, line_width=2)
plot3.line(x = 'timestamp', y ='temperature', color = 'navy', alpha=0.6,source = source8, line_width=2)
show(plot3)

## Weight <br>
Weight of the Hive per Hour

In [23]:
source9 = ColumnDataSource(weight_sch)

plot4 = figure(title = "Weight per hour", x_axis_type = 'datetime', x_axis_label = "Date", 
               y_axis_label = "Weight")
plot4.line(x = 'timestamp', y = 'weight', color = 'navy',alpha=0.3, source = source9, line_width=3)
show(plot4)

# Analysis of the Bee Hive Data from Würzburg <br>
Data is missing for the period of months between May 2018 and October 2018 from the Würzburg hive since the station was not functioning.

In [35]:
temp_wu = "../capstone_working/bee_hive_data/temperature_wurzburg.csv"
flow_wu = "../capstone_working/bee_hive_data/flow_wurzburg.csv"
weight_wu = "../capstone_working/bee_hive_data/weight_wurzburg.csv"
humidity_wu = "../capstone_working/bee_hive_data/humidity_wurzburg.csv"

In [36]:
#calling mean_per_hour function on the input files
netflow_wu = mean_per_hour(flow_wu)
temperature_wu = mean_per_hour(temp_wu)
weight_wu = mean_per_hour(weight_wu)
humidity_wu = mean_per_hour(humidity_wu)

In [37]:
source10 = ColumnDataSource(netflow_wu[netflow_wu>0])
source11 = ColumnDataSource(netflow_wu[netflow_wu<0])
plot5 = figure(title="Flow per hour",x_axis_type='datetime', x_axis_label ="Date", 
               y_axis_label ="Flow")
plot5.line(x = 'timestamp', y = 'flow', color='red', source= source10)
plot5.line(x = 'timestamp', y='flow', color='green', source=source11)
show(plot5)

In [39]:
source12 = ColumnDataSource(humidity_wu[humidity_wu >= 60])
source13 = ColumnDataSource(humidity_wu[(humidity_wu > 50) & (humidity_wu < 60)])
source14 = ColumnDataSource(humidity_wu[humidity_wu <= 50])
plot6 = figure(title = "Humidity per hour", x_axis_type = 'datetime', x_axis_label = "Date", 
               y_axis_label = "Humidity")
plot6.line(x = 'timestamp', y = 'humidity', color = 'red', source = source12)
plot6.line(x = 'timestamp', y ='humidity', color = 'green', source = source13)
plot6.line(x = 'timestamp', y ='humidity', color = 'red', source = source14)
show(plot6)

## Weight <br>
Weight of the Hive per Hour

In [41]:
source15 = ColumnDataSource(weight_wu)

plot7 = figure(title = "Weight per hour", x_axis_type = 'datetime', x_axis_label = "Date", 
               y_axis_label = "Weight")
plot7.line(x = 'timestamp', y = 'weight', color = 'navy',alpha=0.3, source = source15, line_width=3)
show(plot7)