## EMBEDDED SYSTEMS COURSE PROJECT

### Weather Prediction Using Arduino Nano BLE 33 Sensor

In [32]:
# Including header files
import pickle
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB,CategoricalNB
from pprint import pprint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import ShuffleSplit
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [33]:
import warnings
warnings.filterwarnings('ignore')

In [34]:
df = pd.read_csv("Weather Data.csv")
df

Unnamed: 0,DateTime,Condition,Rain,MaxTemp,MinTemp,SunRise,SunSet,MoonRise,MoonSet,AvgWind,AvgHumidity,AvgPressure
0,02.09.2019,Partly cloudy,0.00,27,22,06:32:00,19:37:00,9:52:00,21:45:00,23,66,1012
1,01.09.2019,Partly cloudy,0.00,27,22,06:31:00,19:38:00,8:37:00,21:13:00,21,66,1011
2,31.08.2019,Patchy rain possible,0.50,26,22,06:30:00,19:40:00,7:21:00,20:40:00,22,63,1015
3,30.08.2019,Partly cloudy,0.00,27,22,06:29:00,19:42:00,6:4:00,20:5:00,20,64,1016
4,29.08.2019,Partly cloudy,0.00,27,23,06:27:00,19:43:00,4:47:00,19:26:00,24,61,1015
...,...,...,...,...,...,...,...,...,...,...,...,...
3891,05.01.2009,Overcast,4.32,5,3,08:29:00,17:50:00,0:41:00,1:57:00,15,97,1015
3892,04.01.2009,Mist,2.91,5,3,08:29:00,17:49:00,0:15:00,12:48:00,9,94,1014
3893,03.01.2009,Overcast,0.08,5,3,08:29:00,17:48:00,11:52:00,,16,94,1021
3894,02.01.2009,Overcast,4.48,4,1,08:29:00,17:48:00,11:30:00,23:43:00,12,89,1021


In [35]:
df = df[["MaxTemp","AvgPressure","AvgHumidity","Condition"]]
df

Unnamed: 0,MaxTemp,AvgPressure,AvgHumidity,Condition
0,27,1012,66,Partly cloudy
1,27,1011,66,Partly cloudy
2,26,1015,63,Patchy rain possible
3,27,1016,64,Partly cloudy
4,27,1015,61,Partly cloudy
...,...,...,...,...
3891,5,1015,97,Overcast
3892,5,1014,94,Mist
3893,5,1021,94,Overcast
3894,4,1021,89,Overcast


In [36]:
df = df.dropna() #remove empty rows

#Convert each category into an integer
for ind in df.index:
  if df["Condition"][ind]=='Sunny':
    df["Condition"][ind] = 0
  elif df["Condition"][ind]=='Partly cloudy':
    df["Condition"][ind] = 1
  elif df["Condition"][ind]=='Cloudy':
    df["Condition"][ind] = 2
  elif df["Condition"][ind]=='Overcast':
    df["Condition"][ind] = 3
  elif df["Condition"][ind]=='Patchy rain possible':
    df["Condition"][ind] = 4
  
  else:
    df = df.drop([ind]) #we don't consider other classes so we drop it

#Cast this column to int
df["Condition"] = df["Condition"].astype(int)

In [37]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, shuffle=True, random_state = 42)


#### Training XGBoost Classifier

In [38]:
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

xgb_model = XGBClassifier()
xgb_model.fit(X_train, y_train)
pred = xgb_model.predict(X_test)
print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.6049601417183348
              precision    recall  f1-score   support

           0       0.71      0.85      0.77       668
           1       0.35      0.24      0.29       238
           2       0.04      0.01      0.02        91
           3       0.44      0.50      0.47       114
           4       0.25      0.06      0.09        18

    accuracy                           0.60      1129
   macro avg       0.36      0.33      0.33      1129
weighted avg       0.54      0.60      0.57      1129



#### Training LGBM Classifier

In [39]:
lgb_model = LGBMClassifier()
lgb_model.fit(X_train, y_train)
pred = lgb_model.predict(X_test)
print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.6102745792736936
              precision    recall  f1-score   support

           0       0.71      0.85      0.78       668
           1       0.37      0.24      0.29       238
           2       0.10      0.03      0.05        91
           3       0.43      0.51      0.47       114
           4       0.11      0.06      0.07        18

    accuracy                           0.61      1129
   macro avg       0.34      0.34      0.33      1129
weighted avg       0.55      0.61      0.57      1129



#### Training Random Forest Classifier

In [40]:
from sklearn.ensemble import RandomForestClassifier 
rgc_model = RandomForestClassifier()
rgc_model.fit(X_train, y_train)
pred = rgc_model.predict(X_test)
print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.5907883082373782
              precision    recall  f1-score   support

           0       0.72      0.82      0.77       668
           1       0.29      0.25      0.27       238
           2       0.10      0.04      0.06        91
           3       0.44      0.46      0.45       114
           4       0.20      0.06      0.09        18

    accuracy                           0.59      1129
   macro avg       0.35      0.33      0.33      1129
weighted avg       0.54      0.59      0.56      1129



#### Training Gaussian Naive Bayes

In [41]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(X_train, y_train)
pred = gnb.predict(X_test)
print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.6288751107174491
              precision    recall  f1-score   support

           0       0.72      0.91      0.80       668
           1       0.40      0.07      0.12       238
           2       0.17      0.02      0.04        91
           3       0.36      0.72      0.48       114
           4       0.00      0.00      0.00        18

    accuracy                           0.63      1129
   macro avg       0.33      0.34      0.29      1129
weighted avg       0.56      0.63      0.55      1129



#### Training Logistic Regression

In [42]:
from sklearn.linear_model import  LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
print(accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

0.6377325066430469
              precision    recall  f1-score   support

           0       0.68      0.97      0.80       668
           1       0.00      0.00      0.00       238
           2       0.00      0.00      0.00        91
           3       0.42      0.64      0.51       114
           4       0.00      0.00      0.00        18

    accuracy                           0.64      1129
   macro avg       0.22      0.32      0.26      1129
weighted avg       0.44      0.64      0.52      1129



In [43]:
import tensorflow as tf

In [44]:
from keras.utils import to_categorical

y_train_oh = to_categorical(y_train) #Create classes from the labels
y_test_oh = to_categorical(y_test)
# y_train_oh 

In [45]:
#Parameters :
NB_classes = 5 #number of outputs
NB_neurones = 30 #main number of neurones
NB_features = 3 #number of inputs
activation_func = tf.keras.activations.relu #activation function used

#Densly connected neural network
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func,input_shape=(NB_features,)),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dropout(0.4), #drop randomly some connection to avoid overfiting
                             #softmax will output an array containing probabilities of each classes
                             #the highest one is the predicted class
                             tf.keras.layers.Dense(NB_classes,activation=tf.keras.activations.softmax)
])

model.compile(optimizer="adam",loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) #compile the model

model.summary() #to see the paramter of our model


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_78 (Dense)            (None, 30)                120       
                                                                 
 dense_79 (Dense)            (None, 30)                930       
                                                                 
 dense_80 (Dense)            (None, 30)                930       
                                                                 
 dense_81 (Dense)            (None, 30)                930       
                                                                 
 dense_82 (Dense)            (None, 30)                930       
                                                                 
 dense_83 (Dense)            (None, 30)                930       
                                                                 
 dense_84 (Dense)            (None, 30)               

In [47]:
model.fit(x=X_train,
          y=y_train_oh,
          epochs=50,
          validation_data=(X_test, y_test_oh),
          verbose=1,
          shuffle=True)  # Train our model


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1423567ec70>

In [48]:
performance=model.evaluate(X_test,y_test_oh, batch_size=32, verbose=1, steps=None, )[1] * 100
print('Final accuracy : ', round(performance), '%')

Final accuracy :  64 %


In [55]:
import pickle

model_bytes = pickle.dumps(model)
with open('model.h', 'w') as f:
    f.write('#ifndef MODEL_H\n')
    f.write('#define MODEL_H\n')
    f.write('#include <stdint.h>\n')
    f.write('const uint8_t model_data[] = {')
    for byte in model_bytes:
        f.write(str(hex(byte)) + ', ')
    f.write('};\n')
    f.write('#endif\n')

INFO:tensorflow:Assets written to: ram://71c342ce-6a86-410e-92e5-be240cc43e8f/assets
