## **Temperature Monitoring Data Analysis,Predictive Modelling and Dashboard Generation | Chemonics - Bill and Melinda Gates Foundation**

During my fellowship training with Engineering for Change in 2021 as Health Fellow assigned to the project by Chemonics International and Bill and Melinda Gates Foundation, my role was to analyze and carry out predictive modeling on data collected over 3 years from Mozambique, Burkina Faso, Zimbabwe and Guinea Bisau using telematic sensors placed in ambient pharmaceutical communities as they move along the supply chain to the last-mile consumer.After the analysis, I worked on a whitepaper as guided by the project expert team from Bill and Melinda Gates Foundation.

In [None]:
#@title Default title text
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
import os
os.environ["PYSPARK_PYTHON"] = "python3"

In [None]:
df = pd.read_excel("/content/drive/MyDrive/E4C /trucks_dataset.xlsx")

In [None]:
df.shape

(750281, 10)

In [None]:
df.head()

Unnamed: 0,Name,Date,WarehouseName,Day,MonthName,Month,Year,Temperature,TemperatureRange,Elapsed
0,NTS Truck 1776 ER Back,2018-10-06 05:03:46,MZ_Maputo_NTS_White_Truck,6,October,10,2018,30.4,30-35,0.083333
1,NTS Truck 1776 ER Back,2018-10-06 05:08:46,MZ_Maputo_NTS_White_Truck,6,October,10,2018,31.1,30-35,0.083333
2,NTS Truck 1776 ER Back,2018-10-06 05:13:46,MZ_Maputo_NTS_White_Truck,6,October,10,2018,32.0,30-35,0.083333
3,NTS Truck 1776 ER Back,2018-10-06 05:18:46,MZ_Maputo_NTS_White_Truck,6,October,10,2018,32.4,30-35,0.083333
4,NTS Truck 1776 ER Back,2018-10-06 05:23:46,MZ_Maputo_NTS_White_Truck,6,October,10,2018,32.9,30-35,0.083333


In [None]:
pip install pmdarima

In [None]:
import pmdarima as pm
from pmdarima.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

In [None]:
train, test = train_test_split(df, train_size=52500)

In [None]:
df1 = df[['Date', 'Temperature']]

In [None]:
df1.head()

Unnamed: 0,Date,Temperature
0,2018-10-06 05:03:46,30.4
1,2018-10-06 05:08:46,31.1
2,2018-10-06 05:13:46,32.0
3,2018-10-06 05:18:46,32.4
4,2018-10-06 05:23:46,32.9


In [None]:
df1.isnull().sum()

Date           0
Temperature    0
dtype: int64

In [None]:
train, test = train_test_split(df2, train_size=52500)

ValueError: ignored

In [None]:
model = pm.auto_arima(train, seasonal=True, m=12)

Below work is previous

In [None]:
df.tail(20)

In [None]:
df.isnull().sum()

In [None]:
df.Year.value_counts()

In [None]:
df_new = df[df['Temperature'] < 80]

In [None]:
fig = plt.figure(figsize = (20,24))
plt.title('Temperature Distribution')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.scatter(df_new.Date, df_new.Temperature, s=0.02,c='r')
plt.show()

In [None]:
print('this is max:', df.Temperature.max())

In [None]:
print('this is max:', df.Temperature.min())

In [None]:
print('The first date:', df.Date.min())

In [None]:
print('The first date:', df.Date.max())

In [None]:
for year in range(2018, 2021):
    df_Jan = df_new[(df_new['Date'] < str(year) + '-02') & (df_new['Date'] > str(year-1) + '-12-31')]
    fig = plt.figure(figsize = (12,8))
    plt.title('Temperature distribution' + str(year))
    plt.scatter(df_Jan.Date, df_Jan.Temperature, s = 0.02, c = 'r')
    sns.kdeplot(df_Jan.Date, df_Jan.Temperature)
    plt.xlabel('Year')
    plt.ylabel('Temperature')
    plt.show()

In [None]:

print("There should be a total of %s days"%((pd.to_datetime('2021-01-15') - pd.to_datetime('2018-04-10')).days + 1))

In [None]:
time_span = pd.date_range('2018-04-10', '2020-01-15')

# Convert the data frame to time series data
def df_to_TimeSeries(df):
    date = pd.to_datetime(df.Date.value_counts().index)
    index = sorted(date)
    data = df.Date.value_counts().values[np.argsort(date)]
    ts = pd.DataFrame(data = data, index = index, columns = ['count'])
    # if a date within the time span does not exist, fill it in with count = 0
    ts = ts.reindex(time_span, fill_value=0)
    return ts

In [None]:
all_counts = df_to_TimeSeries(df)

In [None]:
# check is there is any date with count = 0
all_counts[all_counts['count'] == 0]

In [None]:

# plot the time series
fig = plt.figure(figsize = (40, 10))
all_counts['count'].plot()
plt.xlabel('Time')
plt.ylabel('Number of Incidents')
plt.show()

In [None]:

df_theft = df.loc[(df['Temperature'] == 'LARCENY/THEFT'), :]
theft_counts = df_to_TimeSeries(df_theft)
theft_counts[theft_counts['count'] == 0]

In [None]:
fig = plt.figure(figsize=(20,6))
theft_counts['count'].plot()
plt.xlabel('Time')
plt.ylabel('Number of LARCENY/THEFT Incidents')
plt.show()

In [None]:
df_assault = df.loc[(df['Temperature'] == 'Date'), :]
assault_counts = df_to_TimeSeries(df_assault)
assault_counts[assault_counts['count'] == 0]

In [None]:
fig = plt.figure(figsize=(20,6))
assault_counts['count'].plot()
plt.xlabel('Time')
plt.ylabel('MV')
plt.show()

In [None]:
y = theft_counts.resample('MS').sum()
y.tail()

In [None]:
y = y[:-1]

In [None]:
fig = plt.figure(figsize=(20,6))
y['count'].plot()
plt.xlabel('Time')
plt.ylabel('MV')
plt.show()

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
y_past, y_future = y[:-12], y[-12:]

In [None]:
import itertools
p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in pdq]

In [None]:
warnings.filterwarnings("ignore")
aic_min = float("inf")
param = (0,0,0,0,0,0)
best_model = None

for x1 in pdq:
    for x2 in seasonal_pdq:
        try:
            mod = SARIMAX(y_past,
                          order = x1,
                          seasonal_order = x2,
                          enforce_stationarity = False,
                          enforce_invertibility = False)
            results = mod.fit()
            print("(p,d,q,P,D,Q,S) = {}: AIC = {}".format(x1 + x2, results.aic))
            if results.aic < aic_min:
                aic_min = results.aic
                param = x1 + x2
                best_model = mod
        except:
            continue

In [None]:
print("Best (p,d,q,P,D,Q,S) =", param)

In [None]:
results = best_model.fit()
results.summary().tables[1]

In [None]:
from statsmodels.tsa.stattools import adfuller

result = adfuller(y['count'].values[1:])
print("-- Before taking the difference --")
print ("ADF Statistic:", result[0])
print ("p-value:", result[1])

diff = y.diff()
result = adfuller(diff['count'].values[1:])
print ("-- After taking the difference --")
print ("ADF Statistic:", result[0])
print ("p-value:", result[1])

In [None]:
# neglect the beginning of the time series, where the model hasn't converged
pred = results.get_prediction(start = y_past['2018-06':].index[0],  dynamic=False)
pred_ci = pred.conf_int() # 95% confidence interval 

forecast = results.get_forecast(steps=28) # forecast for the next 12 months
forecast_ci = forecast.conf_int() # 95% confidence interval 

fig, ax = plt.subplots(figsize=(20, 16))

ax.plot(y['2018-06':].index, y['2018-06':], label='Observed', color='b')

pred.predicted_mean.plot(ax=ax, label='In-sample Prediction', color='k')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)

forecast.predicted_mean.plot(ax=ax, label='Out-of-sample Forecast', color='r')
ax.fill_between(forecast_ci.index,
                forecast_ci.iloc[:, 0],
                forecast_ci.iloc[:, 1], color='r', alpha=.2)

ax.set_xlabel('Time')
ax.set_ylabel('Std')
plt.legend()
plt.show()

In [None]:
results.plot_diagnostics(figsize=(16, 12))
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = 11, 9
decomposition = seasonal_decompose(y, model='additive')
decomposition.plot()
plt.show()

In [None]:
Adf =df.sort_values("Year")

In [None]:
Adf = df[['Year', 'Temperature']] 

In [None]:
Adf

In [None]:
Adf1 = Adf.rename(columns={'Year':'ds', 'Temperature':'y'})

In [None]:
Adf1

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

In [None]:
Adf1.plot(figsize=(16, 12))
plt.show()

In [None]:
fig = Adf1.plot(figsize=(16, 12))
plt.xlabel('Year')
plt.ylabel('Temperature')
plt.show()

In [None]:
import pandas as pd

# Check for Facebook Prophet
# if not exists then install; takes ~3 minutes
try:
  from fbprophet import Prophet
except ImportError:
  !pip install pystan
  !pip install fbprophet
  from fbprophet import Prophet
  from IPython import display
  display.clear_output()

In [None]:
 !pip install pystan
 !pip install fbprophet

In [None]:
# defining the number of observations we want to predict
nobs = 36
train = Adf1[:-nobs]
test = Adf1[-nobs:]

In [None]:
print(f"Length of dataframe: {len(Adf1)}\n"
      f"Length of train set: {len(train)}\n"
      f"Length of test set: {len(test)}")

In [None]:
# Creating an instance of the Prophet model
prophet = Prophet()
# fitting Prophet model to the train set
prophet.fit(train)

In [None]:
future = prophet.make_future_dataframe(periods=nobs, freq='MS')

In [None]:
forecast = prophet.predict(future)

In [None]:
forecast.head()

In [None]:
fig1 = prophet.plot(forecast)

In [None]:
prophet.plot(forecast)
ax=forecast.plot(x='ds',y='yhat',legend=True,label='predictions',figsize=(12,8))
test.plot(x='ds',y='y',legend=True,label='True Test Data',ax=ax,xlim=('2018-09-01','2020-12-01'))

In [None]:
from statsmodels.tools.eval_measures import rmse

In [None]:
# Remember nobs = 12
y_pred = forecast.iloc[-nobs:]['yhat']
y_true = test['y']
rmse(y_pred, y_true)

In [None]:
# changing trend points
from fbprophet.plot import add_changepoints_to_plot
fig=prophet.plot(forecast)
a=add_changepoints_to_plot(fig.gca(), prophet, forecast)

In [None]:
Adf1[‘ds’] = pd.to_datetime(Adf1[‘ds’])

In [None]:
from cuml.tsa.arima import ARIMA
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [None]:
model_temp = ARIMA(Adf1, order=(1,2,1), fit_intercept=True)
model_temp.fit()

In [None]:
y = theft_counts.resample('Date').sum()
y.tail()

In [None]:
# datatime conversion
# Convert "Date" into a from of "yyyy-mm-dd" and create another column "Month" for later use
df.Date = df.Date.map(lambda x: x.split('/')[2] + '-' + x.split('/')[0] + '-' + x.split('/')[1])
df['Month'] = df.Date.map(lambda x: int(x.split('-')[1]))

# Count the number of incidents for each month
monthly_count = df.Month.value_counts()
monthly_count

In [None]:
fig = plt.figure()
plt.bar(sorted(monthly_count.index), monthly_count.values[np.argsort(monthly_count.index)])
plt.xlabel("MonthName")
plt.ylabel("Temperature")
plt.xlim((0.5,12.5))
plt.ylim((150000,220000))
plt.xticks(np.arange(1, 13))
plt.show()

In [None]:
pip install rapids

In [None]:
pip install conda

In [None]:
!pip install 'cudatoolkit=10.1'

In [None]:
# This get the RAPIDS-Colab install files and test check your GPU.  Run this and the next cell only.
# Please read the output of this cell.  If your Colab Instance is not RAPIDS compatible, it will warn you and give you remediation steps.
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/env-check.py

In [None]:
# This will update the Colab environment and restart the kernel.  Don't run the next cell until you see the session crash.
!bash rapidsai-csp-utils/colab/update_gcc.sh
import os
os._exit(00)

In [None]:
# This will install CondaColab.  This will restart your kernel one last time.  Run this cell by itself and only run the next cell once you see the session crash.
import condacolab
condacolab.install()

In [None]:
# you can now run the rest of the cells as normal
import condacolab
condacolab.check()

In [None]:
# Installing RAPIDS is now 'python rapidsai-csp-utils/colab/install_rapids.py <release> <packages>'
# The <release> options are 'stable' and 'nightly'.  Leaving it blank or adding any other words will default to stable.
# The <packages> option are default blank or 'core'.  By default, we install RAPIDSAI and BlazingSQL.  The 'core' option will install only RAPIDSAI and not include BlazingSQL, 
!python rapidsai-csp-utils/colab/install_rapids.py stable
import os
os.environ['NUMBAPRO_NVVM'] = '/usr/local/cuda/nvvm/lib64/libnvvm.so'
os.environ['NUMBAPRO_LIBDEVICE'] = '/usr/local/cuda/nvvm/libdevice/'
os.environ['CONDA_PREFIX'] = '/usr/local'

In [None]:
import cudf
import io, requests

In [None]:
import cuml

In [None]:
from cuml.tsa.arima import ARIMA
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import datetime
import pandas as p

A simple MA(2) example
We start with a simple Moving Average model. Let's first load and visualize the migrations in Auckland by age dataset:

In [None]:
df_mig = pd.read_excel("/content/drive/MyDrive/E4C /Test set.xlsx")

In [None]:
df_mig.head()

We want to fit the model with q=2 and with an intercept. The ARIMA class accepts cuDF dataframes or array-like types as input (host or device), e.g numpy arrays. Here we already have a dataframe so we can simply pass it to the ARIMA constructor with the model parameters:

In [None]:
Adf =df_mig.sort_values("Year")

In [None]:
Adf = df_mig[['Year', 'Temperature']] 

In [None]:
Adf

In [None]:
Adf1 = Adf.rename(columns={'Year':'ds', 'Temperature':'y'})

In [None]:
Adf1

In [None]:
model_temp = ARIMA(Adf1, order=(1,2,1), fit_intercept=True)
model_temp.fit()

In [None]:
# Predict in-sample and forecast out-of-sample
pred_temp = model_temp.predict(80, 160)
visualize(Adf1, pred_temp, 80)

Confidence intervals
To get confidence intervals when forecasting, we can specify the confidence level (here 95%):

In [None]:
fc_temp, lower_temp, upper_temp = model_temp.forecast(23, level=0.95)
visualize(Adf1, fc_temp, lower=lower_temp, upper=upper_temp)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

In [None]:
from fbprophet import Prophet
import pandas as pd

In [None]:
df1 = pd.read_excel("/content/drive/MyDrive/E4C /trucks_dataset.xlsx")

In [None]:
df1

In [None]:
data = pd.read_excel("/content/drive/MyDrive/E4C /trucks_datasetCC.xlsx")

In [None]:
data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
data.index = pd.to_datetime(data['Temperature'])

In [None]:
rolling_mean = data.rolling(window = 12).mean()
data['rolling_mean_diff'] = rolling_mean - rolling_mean.shift()
ax1 = plt.subplot()
data['rolling_mean_diff'].plot(title='after rolling mean & differencing');
ax2 = plt.subplot()
data.plot(title='original');

In [None]:
seasonality=data.seasonal
seasonality.plot(color='green')

In [None]:
# Filter data
df = df[df['Temperature']==country]
df.rename(columns={"date": "ds", filter_case: "y"},inplace=True) 
df['ds'] = pd.to_datetime(df['ds'],infer_datetime_format=True)
df = df[df['ds']>"2020-02-01"]
df['y'] = df['y'].astype(float)
df = df[['y','ds']]

# Run Prophet to get predictions
pred = Prophet()
pred.fit(df)
future = pred.make_future_dataframe(periods=365)
forecast = pred.predict(future)

#Plot the prediction Graph
graph = pred.plot(forecast)

In [None]:
from sktime.forecasting.arima import AutoARIMA

In [None]:
from sktime.forecasting.arima import AutoARIMA

forecaster = AutoARIMA(start_p=8, max_p=9, suppress_warnings=True)
sun_train.index = sun_train.index.astype(int)
forecaster.fit(sun_train)
forecaster.summary()

In [None]:
# Run the ADF test on the time series
result = adfuller(df['Temperature'])

# Plot the time series
fig, ax = plt.subplots();
city.plot(ax=ax);

# Print the test statistic and the p-value
print('ADF Statistic:', result[0])
print('p-value:', result[1])

In [None]:
import pandas as pd

# Check for Facebook Prophet
# if not exists then install; takes ~3 minutes
try:
  from fbprophet import Prophet
except ImportError:
  !pip install pystan
  !pip install fbprophet
  from fbprophet import Prophet
  from IPython import display
  display.clear_output()

In [None]:
!pip install pystan

In [None]:
 !pip install pystan
 !pip install fbprophet

In [None]:
df = df.sort_values("Date")

In [None]:
Temp_prophet_df = df[['Date', 'Temperature']] 

In [None]:
Temp_prophet_df

In [None]:
Temp_prophet_df = Temp_prophet_df.rename(columns={'Date':'ds', 'Temperature':'y'})

In [None]:
Temp_prophet_df

In [None]:
Temp_prophet_df[‘ds’] = pd.to_datetime(Temp_prophet_df[‘ds’])

In [None]:
# defining the number of observations we want to predict
nobs = 12
train = Temp_prophet_df[:-nobs]
test = Temp_prophet_df[-nobs:]

In [None]:
print(f"Length of dataframe: {len(Temp_prophet_df)}\n"
      f"Length of train set: {len(train)}\n"
      f"Length of test set: {len(test)}")

In [None]:
# Creating an instance of the Prophet model
prophet = Prophet()
# fitting Prophet model to the train set
prophet.fit(train)

In [None]:
future = prophet.make_future_dataframe(periods=nobs, freq='MS')

In [None]:
forecast = prophet.predict(future)

In [None]:
forecast.head()

From the Table above,
Trend:  
**yhat:** the forecasted value of our metric (in Statistics, yhat is a notation 
traditionally used to represent the predicted values of a value y)
**yhat_lower:** the lower bound of our forecasts
**yhat_upper:** the upper bound of our forecasts

In [None]:
pred[['ds', 'trend', 'yhat', 'yhat_upper', 'yhat_lower']].head()

In [None]:
fig1 = prophet.plot(forecast)

From the plot above, Prophet plots the observed values of time series(black dots),the forecasted values(blue lines) and the uncertainty intervals of our forecasts(blue shaded region). 

In [None]:
df = Temp_prophet_df[Temp_prophet_df['Temperature']]
df.rename(columns={"date": "ds", filter_case: "y"},inplace=True) 
df['ds'] = pd.to_datetime(df['ds'],infer_datetime_format=True)
df = df[df['ds']>"2020-02-01"]
df['y'] = df['y'].astype(float)
df = df[['y','ds']]

# Run Prophet to get predictions
pred = Prophet()
pred.fit(df)
future = pred.make_future_dataframe(periods=365)
forecast = pred.predict(future)

#Plot the prediction Graph
graph = pred.plot(forecast)

In [None]:
prophet.plot(forecast)
ax=forecast.plot(x='ds',y='yhat',legend=True,label='predictions',figsize=(12,8))
test.plot(x='ds',y='y',legend=True,label='True Test Data',ax=ax,xlim=('2018-09-01','2020-12-01'))

As you can see from the above plot predictions and test values are almost going together.

** Returning components** of our forecasts.This can help reveal how daily, weekly and yearly patterns of the time series contribute to the overall forecasted values.

In [None]:
fig=prophet.plot_components(forecast)

Since we are working with daily data, you would see a weekly seasonality plot 

Great - But how do we know if our model actually performed well? We can import a root-mean-square error function from the statsmodels library to compare the RMSE of our predictions to the true values:

In [None]:
from statsmodels.tools.eval_measures import rmse

To make sure that we feed in the correct variable to the prediction parameter, we will create a new variable called ‘y_pred’. Remember that the forecast variable contains more than the final 12 rows that are our predictions, so we need to separate our prediction values and specify the ‘yhat’ column to make sure our ‘y_pred’ variable is a reference to an array containing the 12-month predictions. We can also define ‘y_true’ to make it look nicer when using the RMSE function.

In [None]:
# Remember nobs = 12
y_pred = forecast.iloc[-nobs:]['yhat']
y_true = test['y']
rmse(y_pred, y_true)

Chainging points

In [None]:
# changing trend points
from fbprophet.plot import add_changepoints_to_plot
fig=prophet.plot(forecast)
a=add_changepoints_to_plot(fig.gca(), prophet, forecast)

These red dotted line show the major points where trendline happens to change.

**Valadation **

In [None]:
# Initial training period.
initial= 2*50
initial= str(initial)+' days'
#Period length that we perform the cross validation for.
period= 2*365
period=str(period)+' days'
#Horizon of prediction essentially for each fold.
horizon = 365
horizon=str(horizon)+' days'
fb_cv= cross_validation(prophet, initial=initial, period=period, horizon=horizon)
# Performance Metrics of fb_cv
performance_metrics(fb_cv)

In [None]:
!pip install pystan
!pip install fbprophet

In [None]:
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [None]:
from fbprophet.plot import add_changepoint_to_plot
fig1 = prophet.plot(forecast)
# viewing the points in time where the trajectory of the price index changed
a = add_changepoints_to_plot(fig1.gca(), prophet, forecast)

In [None]:
m = Prophet()
m.fit(Temp_prophet_df)

In [None]:
# Forcasting into the future
future = m.make_future_dataframe(periods=1825)
forecast = m.predict(future)

In [None]:
forecast

Y hat (written ŷ ) is the predicted value of y (the dependent variable) in a regression equation. It can also be considered to be the average value of the response variable. ... The equation is calculated during regression analysis.

In [None]:
figure = m.plot(forecast, xlabel='Date', ylabel='Temperature')

In [None]:
plt.figure(figsize=(10,10))
plt.plot(df['Date'], df['Temperature'])

In [None]:
figure3 = m.plot_components(forecast)

In [None]:
import numpy as np
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 70)
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode()

In [None]:
py.iplot([
    go.Scatter(x=Temp_prophet_df['ds'], y=Temp_prophet_df['y'], name='y'),
    go.Scatter(x=forecast['ds'], y=forecast['yhat'], name='yhat'),
    go.Scatter(x=forecast['ds'], y=forecast['yhat_upper'], fill='tonexty', mode='none', name='upper'),
    go.Scatter(x=forecast['ds'], y=forecast['yhat_lower'], fill='tonexty', mode='none', name='lower'),
    go.Scatter(x=forecast['ds'], y=forecast['trend'], name='Trend')
])

In [None]:
print('RMSE: %f' % np.sqrt(np.mean((forecast.loc[:1825, 'yhat']-Temp_prophet_df['y'])**2)) )

In [None]:
m = Prophet(changepoint_prior_scale=2.5)
m.fit(Temp_prophet_df)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

In [None]:
figure4 = m.plot(forecast, xlabel='Date', ylabel='Temperature')

In [None]:
print('RMSE: %f' % np.sqrt(np.mean((forecast.loc[:1825, 'yhat']-Temp_prophet_df['y'])**2)) )

In [None]:
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from fbprophet import Prophet

# TODO: linear regression for future predictions
from sklearn import linear_model
from sklearn.utils import shuffle
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline

from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from matplotlib import pyplot

In [None]:
# dataframes creation for both training and testing datasets part2
Temp_prophet_df = pd.read_csv('/content/drive/MyDrive/E4C /Samplecsvdatafile1.csv')

In [None]:
Temp_prophet_df

In [None]:
# Bar Chart to indicate the number of regions 
plt.figure(figsize=[25,12])
sns.countplot(x = 'Date', data =Temp_prophet_df)
plt.xticks(rotation = 45)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import display, HTML
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 10, 6

In [None]:
df['Date'] = pd.to_datetime(df['Date'], infer_datetime_format=True)
indexDaset = df.set_index(['Date'])

In [None]:
from datetime import datetime
indexDaset.head(5)

In [None]:
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.plot(indexedDataset)

In [None]:
cols_plot = ['Date', 'Year', 'Temperature']

axes = time_series_df[cols_plot].plot(marker='.', alpha=0.5, linestyle='None', subplots=True)
for ax in axes:
    ax.set_ylabel('Temperature')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/E4C /Mo.txt")
display(df.head(5))

In [None]:
print("Number of rows: ", df.shape[0])
counts = df.describe().iloc[0]
display(
pd.DataFrame(
counts.tolist(), 
columns=["Count of values"], 
index=counts.index.values
).transpose()
)

In [None]:
df_train, df_test = train_test_split(df, test_size=0.25)

In [None]:
numerical_feature = [feature for feature in df.columns if df[feature].dtypes!="O"]
numerical_feature

This is a Bioinformatics Class

In [None]:
import pandas as pd 

In [None]:
clf = RandomForestClassifier(n_estimators=30)
clf.fit(df_train, df_train["Temperature"])

to remove all the columns that are unusable from the algorithm.

In [None]:
numerical_feature = [feature for feature in df.columns if df[feature].dtypes!="O"]
numerical_feature

In [None]:
for feature in numerical_feature:
   sns.scatterplot(x = df[feature], y = df['Temperature'])
   plt.show()

In [None]:
df = df.drop(['Name', 'Date', 'MonthName'], axis=1)
df['Temperature'] = np.log(df['Temperature'])
scaler=MinMaxScaler()
scaler.fit(df)
dataset=pd.DataFrame(scaler.transform(df),columns=df.columns)
dataset.head()

In [None]:
X=df.drop(['Temperature'], axis=1)
y=df['Temperature']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=42)

In [None]:
lr=LinearRegression()
lr.fit(X_train,y_train)

In [None]:
df

In [None]:
ls

In [None]:
%cd

In [None]:
%cd /content/drive/MyDrive/E4C /Mo.txt

In [None]:
#Adding necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
df = pd.read_csv('/content/drive/MyDrive/E4C /Mo.txt')

In [None]:
df

In [None]:
df1 = df.drop(["Name"])

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(df["Temperature"], bins=100)

In [None]:
!pip install sklearn # ! says to run in terminal
from sklearn.linear_model import LinearRegression # import the linear regression model

In [None]:
X_test = pd.read_csv("/content/drive/MyDrive/E4C /Mo.txt") # load in the separate testing data

y_test = X_test["Temperature"] # set our output equal to the median house value column
X_test = X_test.drop(["Temperature"], axis = 1) # remove that column from the input. axis = 1 means to remove the column

y_train = df["Temperature"] # same thing for training data
X_train = df.drop(["Temperature"], axis = 1)

In [None]:
lm = LinearRegression().fit(X_train, y_train) # .fit() fits the data to the model
y_pred = lm.predict(X_test) # test how accurate the model is using testing data

print("R-Squared value:",lm.score(X_test,y_test)) 

In [None]:
df.tail(700)

In [None]:
In [1]: import pandas as pd
   ...: import numpy as np
   ...: 
   ...: from bcpandas import SqlCreds, to_sql

In [None]:
pip install bcpandas

In [None]:
In [1]: import pandas as pd
   ...: import numpy as np
   ...: 
   ...: from bcpandas import SqlCreds, to_sql

In [None]:
df = pd.DataFrame(
   ...:         data=np.ndarray(shape=(30, 4), dtype=int), 
   ...:         columns=[f"col_{x}" for x in range(4)]
   ...:     )

In [None]:
df

In [None]:
#Adding necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
display(df.dtypes) #To find the type of attribute of each column
df.shape #To find the number of rows and columns

In [None]:
to_sql(df, 'my_test_table', creds, index=False, if_exists='replace')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/MyDrive/ETALO/TemperatureMonitoringDB202108210213.zip

In [None]:
%ls

In [None]:
!unzip /content/TemperatureMonitoringDB202108210213.bacpac

In [None]:
%cd /content/TemperatureMonitoringDB202108210213.bacpac

In [None]:
import pandas as pd
import xml.etree.ElementTree as et
import os
from pathlib import Path
import glob

In [None]:
with os.scandir('/content/Data/dbo.Warehouse_table') as entries:
    for entry in entries:
        print(entry.name)

In [None]:
%cd /content/Data/dbo.Warehouse_table

In [None]:
%ls

In [None]:
pip install bcpandas

In [None]:
#!/bin/bash

In [None]:
df = pd.read_bcp('TableData-013-00001.BCP')

In [None]:
!df

In [None]:
df=pd.read_sql_query

In [None]:
df = pd.read_sql_table('/content/Data/dbo.Warehouse_table/TableData-002-00001.BCP')

In [None]:
df = pd.read_table('Data/dbo.Warehouse_table/TableData-034-00002.BCP ')

In [None]:
with os.scandir('/content/Data/dbo.Sensor_Warehouse') as entries:
    for entry in entries:
        print(entry.name)

In [None]:
with os.scandir('/content/Data/dbo.Master_key') as entries:
    for entry in entries:
        print(entry.name)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
df = pd.read_csv('/content/Data/dbo.Warehouse_table/TableData-002-00001.BCP')

In [None]:
path = "/content/Data/dbo.Warehouse_table"

In [None]:
df_bonus = pd.read_csv(path)

In [None]:
!pip install -U -q PyDrive

In [None]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [None]:
import glob
from google.colab import drive
drive.mount('/gdrive', force_remount=True)
#!ls "/gdrive/My Drive/folder"
#!ls "/gdrive/My Drive/folder"

files = glob.glob(f"/content/drive/MyDrive/chem*.txt")
for file in files:  
  do_something(file)*.txt")
for file in files:  
  do_something(file)*.txt")
for file in files:  
  do_something(file)

### **I. Data Preprocessing**

In [None]:
#Adding necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [None]:
copied_path = ‘/content/drive/MyDrive/ETALO/TemperatureMonitoringDB202108210213.zip’ #remove ‘content/’ from path then use 
data = pd.read_csv(drive/MyDrive/chem/TableData-000-00001.BCP)

In [None]:
#Reading the file and display first rows
df = pd.read_csv(/content/drive/MyDrive/chem/TableData-000-00001.BCP)


In [None]:
#Display the bottom rows
df.tail(5)                        

In [None]:
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import plotly.express as px

In [None]:
t_test(df, "Temperature")

Viewing the data from the tail end, it shows that the column for the Total Hourly Rain and Maximum Hourly rainfall rate have entries (0) yet it is null from the view of the head side.

 Checking the types of data

In [None]:
display(df.dtypes) #To find the type of attribute of each column
df.shape #To find the number of rows and columns

In [None]:
#Checking if there are any duplicated rows
duplicate_rows_df = df[df.duplicated()]
print("number of duplicate rows: ", duplicate_rows_df.shape)

There are no duplicated rows 

In [None]:
df.count() #To find values different from null on each column

Find number of nulls on each columns

In [None]:
#Find number of nulls on each columns. Two attributes have 228 null values
print(df.isnull().sum())

In [None]:
df["Temperature"].min()

In [None]:
df["Temperature"].max()

In [None]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.rand(5))
ser

In [None]:
df.mean()

In [None]:
#Presenting the distribution of each attribute from the dataset using histograms
df.hist(column = 'Temp', figsize=(5, 5))
plt.show()

In [None]:
df.hist(column = 'Rh', figsize=(5, 5))
plt.show()

Detecting missing or null values in the attributes 'Total Hourly Rain' and 'Time of Rainfall'. The distribution of those values is no normal, with high frequency in just one value. Included in the dropped columns along with the categorical values.

In [None]:

#Dropping categorical columns to obtain the final dataset
num_df = df.drop(['Serial_id'], axis=1)
num_df['Temp'] = num_df['Temp']*(10**-2)
num_df['Rh'] = num_df['Rh']*(10**-1)
num_df.head(5)

In [None]:
#We describe the dataset to find the mean, the standard deviation, and the maximum and minimum values for each attribute
num_df.describe(datetime_is_numeric=False)

Detecting Outliers

An outlier is a point or set of points that are different from other points. Sometimes they can be very high or very low. It's often a good idea to detect and remove the outliers. Because outliers are one of the primary reasons for resulting in a less accurate model.

In [None]:
#Importing seaborn library
import seaborn as sns

In [None]:
#Boxplot to detect outliers graphically
ax = sns.boxplot(data=num_df, orient="v", palette="Set2", width = 0.6, whis = 1.5)

In [None]:
#Display boxplot for the variables with more outliers
sns.boxplot(x=df['Temp'])

In [None]:
sns.boxplot(x=df['Rh'])

Determine the ranges and Inter-Quartile Range

In [None]:
Q1 = num_df.quantile(0.25)
Q3 = num_df.quantile(0.75)
IQR = Q3 - Q1
print(IQR)

From above, it shows that there were 72 outliers since the original shape was [642, 16]. Outliers can be detected and dropped using the scikitlearn library previous to the model application. Now we are aware of the distribution of the data.

### **II. Report Dataset Creation**

In order to obtain the summary report from temperature and humidity measurements, we have to create the dataset that contains the cumulative frequencies versus months or days. Pandas library will provide the tools to generate the final dataset before displaying it with Dash.

In [None]:
import pandas as pd
import numpy as np

In [None]:
num_df

In [None]:
num_df.dtypes

In [None]:
num_df['Date'] = pd.to_datetime(num_df['Date'])
print(num_df.info())

In [None]:
num_df.plot(x='Date', y = 'Temp', figsize=(18, 10))
plt.show()

In [None]:
num_df.plot(x='Date', y = 'Rh', figsize=(18, 10))
plt.show()

In [None]:
#Separating the datetime into months, days, hours, minutes and seconds

num_df['seconds'] = num_df['Date'].dt.second
num_df['minute'] = num_df['Date'].dt.minute
num_df['hour'] = num_df['Date'].dt.hour
num_df['day_of_week'] = num_df['Date'].dt.weekday
num_df['day'] = num_df['Date'].dt.day
num_df['month'] = num_df['Date'].dt.month



In [None]:
num_df

In [None]:
grouped_max = num_df.groupby('month').max()
grouped_max

In [None]:
grouped_max['Temp'].plot(kind = 'bar')

In [None]:
march_measures = num_df[(num_df['month']==3)]
march_measures

In [None]:
april_measures = num_df[(num_df['month']==4)]
april_measures

###1. Finding Max Values per Month

In [None]:
#Maximum values of Temperature and Humidity during March and April, 2019

#Max values from March
temp_march = march_measures['Temp'] 
max_value_march = round(temp_march.max(), 2)
print('The maximum temperature value from March 2019 was: ', max_value_march, '°C')

#Max values from March
temp_april = april_measures['Temp'] 
max_value_april = round(temp_april.max(), 2)
print('The maximum temperature value from April 2019 was: ', max_value_april, '°C')

###2. Finding Number of measurements expected

In [None]:
#If we consider 5 minute increments, then there should be 12 temperature measurements captured by hour
#24 hours each day, and then multiply it for the number of days per month

exp_march = 12*24* march_measures['day'].max()
print ("Number of temperature captures expected in March: ", exp_march)

exp_april = 12*24* april_measures['day'].max()
print ("Number of temperature captures expected in April: ", exp_april)

###3. Total measurements captured

In [None]:
#Total increments in March 2019

total_increments_march = len(march_measures.index)
print('Total 5 min increments is March were: ', total_increments_march)

#Total increments in April 2019

total_increments_april = len(april_measures.index)
print('Total 5 min increments is April were: ', total_increments_april)

### 4. Number of measurements over 30°C

In [None]:
#For March 2019
over_30_march =march_measures[(march_measures['Temp']>30)]
over_30_march = len(over_30_march.index)
print('The total number of increments over 30°C in March 2019 is: ', over_30_march)

#For April 2019
over_30_april = april_measures[(april_measures['Temp']>30)]
over_30_april = len(over_30_april.index)
print('The total number of increments over 30°C in March 2019 is: ', over_30_april)

### 5. Cumulative hours over 30°C

In [None]:
#Initial dataframe for cumulative hours calculations
num_df

In [None]:
#Moving the indexes to put the following value in the same row
s = num_df.loc[72:2847,'Date']
s.index = s.index - 1
s

In [None]:
num_df.insert(1, "Following_Date", s)

In [None]:
num_df.loc[71:2487, :]

In [None]:
#Creating a new dataset to calculate the time elapsed
data = {
        "a": (num_df['Date']),
        "b": (num_df['Following_Date'])
        
        }

time_df = pd.DataFrame(data, dtype="datetime64[ns]")
time_df["elapsed"] = (time_df.b - time_df.a) / pd.Timedelta('1 hour')

In [None]:
time_df.tail(10)

In [None]:
e = time_df.loc[:,'elapsed']
e

In [None]:
num_df['Time_elapsed_in_hours'] = e
num_df.loc[71:2487, :]

In [None]:
#Finding the number of measurements in the temperature ranges and making imputations of missing values

num_df['Temp'] = num_df['Temp'].fillna(0)
num_df.Temp.isna().sum()

num_df.loc[(num_df['Temp'] < 30) , 'Temp_Ranges'] = 'Under 30'
num_df.loc[((num_df['Temp']>=30) & (num_df['Temp']<35)), 'Temp_Ranges'] = '30-35'
num_df.loc[((num_df['Temp']>=35) & (num_df['Temp']<40)), 'Temp_Ranges'] = '35-40'
num_df.loc[((num_df['Temp']>=40) & (num_df['Temp']<45)), 'Temp_Ranges'] = '40-45'
num_df.loc[((num_df['Temp']>=45) & (num_df['Temp']<50)), 'Temp_Ranges'] = '45-50'
num_df.loc[((num_df['Temp']>=50) & (num_df['Temp']<55)), 'Temp_Ranges'] = '50-55'
num_df.loc[((num_df['Temp']>=55) & (num_df['Temp']<60)), 'Temp_Ranges'] = '55-60'
num_df.loc[num_df['Temp']>=60, 'Temp_Ranges'] = 'Above 60'

print(num_df.Temp_Ranges.value_counts())



In [None]:
num_df

In [None]:
num_df.Temp_Ranges

In [None]:
df2 = num_df.loc[72:2487]
df2

In [None]:
df2.set_index('month')

In [None]:
df2.shape

In [None]:
len(df2)

In [None]:
#Finding cumulative frequencies regarding temperature ranges
x = 0
for i in range(1,12,1):
  total_under_30 = []
  total_hours_30_35 = []
  total_hours_35_40 = []
  total_hours_40_45 = []
  total_hours_45_50 = []
  total_hours_50_55 = []
  total_hours_55_60 = []
  total_hours_above_60 = []
  for x in range(72, len(df2), 1):
    if df2.at[x, 'Temp_Ranges'] == '30-35':
      m = df2.at[x, 'Time_elapsed_in_hours']
      total_hours_30_35.append(m)
    elif df2.at[x, 'Temp_Ranges'] == 'Under 30':
      n = df2.at[x, 'Time_elapsed_in_hours']
      total_under_30.append(n)

print(total_hours_30_35)
print(total_under_30)


In [None]:
len(total_hours_30_35)


In [None]:
len(total_under_30)

In [None]:
 # Iterate each element in list
# and add them in variable total


# Finding sum of elements in list
total = 0

 
# Iterate each element in list
# and add them in variable total
for ele in range(0, len(total_hours_30_35)):
    total = total + total_hours_30_35[ele]
 
# printing total value

print("Cumulative hours at 30°-35°C: ", round(total, 2), " hours.")

In [None]:
for ele in range(0, len(total_under_30)):
    total = total + total_under_30[ele]

print("Cumulative hours under 30°: ", round(total, 2), " hours.")

###3. Final report dataset generation

In [None]:
#Dataset structure is defined previous to the Dash displaying
#For that, we specify the content of every column to the new transformed dataset "final_report"

final_report = pd.DataFrame({'Months':['January','February','March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
                             'Number of Temperature Captures Registered':['NaN','NaN',total_increments_march,total_increments_april,'NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'], 
                             'Number of Temperature Captures Expected': ['NaN', 'NaN', exp_march, exp_april, 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN', 'NaN'],
                             'Total 5 min Increments Over 30°C':['NaN','NaN',over_30_march,over_30_april,'NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             'Cumulative hours above 30° C':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '30-35':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '35-40':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '40-45':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '45-50':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '50-55':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             '55-60':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             'Above 60':['NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN'],
                             'Maximum Temperature Observed (°C)':['NaN','NaN',max_value_march,max_value_april,'NaN','NaN','NaN','NaN','NaN','NaN','NaN','NaN']
})


In [None]:
final_report

### III. Dashboard Generation

In [None]:
!pip install jupyter-dash -q

In [None]:
!pip install dash-cytoscape -q

In [None]:
#Installing Dash library
!pip install dash 

In [None]:
 !pip install Jupyterlab

In [None]:
#Importing libraries necessary for the task
import dash
from dash import no_update
from jupyter_dash import JupyterDash  # pip install dash
import dash_cytoscape as cyto  # pip install dash-cytoscape==0.2.0 or higher
import dash_core_components as dcc
import dash_html_components as html
from jupyter_dash import JupyterDash
import pandas as pd
import numpy as np
import plotly.express as px
import random
import string
import math
from dash.dependencies import Input, Output
import dash_table 

In [None]:
# -------------------------------------------------------------------------------------
# Following the final report dataset
df = final_report
# Creating an ID column name gives us more interactive capabilities
df["id"] = df.index +1
print(df.columns)


In [None]:
print (df.head())

In [None]:
# App layout
app = JupyterDash(__name__, prevent_initial_callbacks=True) # this was introduced in Dash version 1.12.0

# Sorting operators (https://dash.plotly.com/datatable/filtering)
app.layout = html.Div([
    dash_table.DataTable(
        id='datatable-interactivity',
        columns=[
            {"name": i, "id": i, "deletable": True, "selectable": True, "hideable": True}
            if i == "id" or i == "months"
            else {"name": i, "id": i, "deletable": True, "selectable": True}
            for i in df.columns
        ],
        data=df.to_dict('records'),  # the contents of the table
        editable=True,              # allow editing of data inside all cells
        filter_action="native",     # allow filtering of data by user ('native') or not ('none')
        sort_action="native",       # enables data to be sorted per-column by user or not ('none')
        sort_mode="single",         # sort across 'multi' or 'single' columns
        column_selectable="multi",  # allow users to select 'multi' or 'single' columns
        row_selectable="multi",     # allow users to select 'multi' or 'single' rows
        row_deletable=True,         # choose if user can delete a row (True) or not (False)
        selected_columns=[],        # ids of columns that user selects
        selected_rows=[],           # indices of rows that user selects
        page_action="native",       # all data is passed to the table up-front or not ('none')
        page_current=0,             # page number that user is on
        page_size=12,                # number of rows visible per page
        style_cell={                # ensure adequate header width when text is shorter than cell's text
            'whiteSpace': 'normal',
            'height': 'auto',
            'lineHeight': '15px',

            #'minWidth': 95, 'maxWidth': 95, 'width': 95
        },
        style_cell_conditional=[    # align text columns to left. By default they are aligned to right
            {
                'if': {'column_id': c},
                'textAlign': 'center'
            } for c in ['country', 'iso_alpha3']
        ],
        style_data={                # overflow cells' content into multiple lines
            'whiteSpace': 'normal',
            'height': 'auto',
            'lineHeight': '15px'
        }
    ),

    html.Br(),
    html.Br(),
    html.Div(id='bar-container'),
    html.Div(id='choromap-container')

])


In [None]:
# Create bar chart
@app.callback(
    Output(component_id='bar-container', component_property='children'),
    [Input(component_id='datatable-interactivity', component_property="derived_virtual_data"),
     Input(component_id='datatable-interactivity', component_property='derived_virtual_selected_rows'),
     Input(component_id='datatable-interactivity', component_property='derived_virtual_selected_row_ids'),
     Input(component_id='datatable-interactivity', component_property='selected_rows'),
     Input(component_id='datatable-interactivity', component_property='derived_virtual_indices'),
     Input(component_id='datatable-interactivity', component_property='derived_virtual_row_ids'),
     Input(component_id='datatable-interactivity', component_property='active_cell'),
     Input(component_id='datatable-interactivity', component_property='selected_cells')]
)
def update_bar(all_rows_data, slctd_row_indices, slct_rows_names, slctd_rows,
               order_of_rows_indices, order_of_rows_names, actv_cell, slctd_cell):
    print('***************************************************************************')
    print('Data across all pages pre or post filtering: {}'.format(all_rows_data))
    print('---------------------------------------------')
    print("Indices of selected rows if part of table after filtering:{}".format(slctd_row_indices))
    print("Names of selected rows if part of table after filtering: {}".format(slct_rows_names))
    print("Indices of selected rows regardless of filtering results: {}".format(slctd_rows))
    print('---------------------------------------------')
    print("Indices of all rows pre or post filtering: {}".format(order_of_rows_indices))
    print("Names of all rows pre or post filtering: {}".format(order_of_rows_names))
    print("---------------------------------------------")
    print("Complete data of active cell: {}".format(actv_cell))
    print("Complete data of all selected cells: {}".format(slctd_cell))

    dff = df if all_rows_data is None else pd.DataFrame(all_rows_data)

    # used to highlight selected countries on bar chart
    colors = ['#7FDBFF' if i in slctd_row_indices else '#0074D9'
              for i in range(len(dff))]

    if "Number of Temperature Captures Registered" in dff and "Months" in dff:
        return [
            dcc.Graph(id='bar-chart',
                      figure=px.bar(
                          data_frame=dff,
                          x="Months",
                          y='Number of Temperature Captures Registered',
                          labels={"Number of Temperature Captures Registered": "Number of Temperature Captures Registered"}
                      ).update_layout(showlegend=False, xaxis={})
                      .update_traces(marker_color=colors, hovertemplate="<b>%{y}</b><extra></extra>")
                      )
        ]

In [None]:
# Create choropleth map
@app.callback(
    Output(component_id='choromap-container', component_property='children'),
    [Input(component_id='datatable-interactivity', component_property="derived_virtual_data"),
     Input(component_id='datatable-interactivity', component_property='derived_virtual_selected_rows')]
)
def update_map(all_rows_data, slctd_row_indices):
    dff = df if all_rows_data is None else pd.DataFrame(all_rows_data)

    # highlight selected countries on map
    borders = [5 if i in slctd_row_indices else 1
               for i in range(len(dff))]

    if "iso_alpha3" in dff and "internet daily" in dff and "country" in dff:
        return [
            dcc.Graph(id='choropleth',
                      style={'height': 700},
                      figure=px.choropleth(
                          data_frame=dff,
                          locations="iso_alpha3",
                          scope="africa",
                          color="internet daily",
                          title="% of hours of exposure from total hours",
                          template='plotly_dark',
                          hover_data=['country', 'internet daily'],
                      ).update_layout(showlegend=False, title=dict(font=dict(size=28), x=0.5, xanchor='center'))
                      .update_traces(marker_line_width=borders, hovertemplate="<b>%{customdata[0]}</b><br><br>" +
                                                                              "%{customdata[1]}" + "%")
                      )
        ]


In [None]:
# Highlight selected column
@app.callback(
    Output('datatable-interactivity', 'style_data_conditional'),
    [Input('datatable-interactivity', 'selected_columns')]
)
def update_styles(selected_columns):
    return [{
        'if': {'column_id': i},
        'background_color': '#D2F3FF'
    } for i in selected_columns]

In [None]:
app.run_server(mode='external', port=8051)

#import os

#if __name__ == '__main__':
    #app.css.config.serve_locally = True
    #app.scripts.config.serve_locally = True
    #app.run_server(debug=True, use_reloader=False, host=os.getenv('HOST', '127.0.0.1'),port=os.getenv('PORT', '8051'), proxy= None)
