#tinyMeteo - weather forecasting on microcontroller

## Collect Data 

Collect weather data for years 2009-2022 for the desired location from World Weather Online API in JSON format.

In [None]:
!rm -r parsed_weather_data
!rm weather_data.csv
!rm weather_data_parsed.csv
!rm final_data.csv

In [None]:
from google.colab import files 
files.upload() #upload a .json file with Kaggle API key and location coordinates or location name

In [4]:
import json

# Read the uploaded files for the API key and location detials:
with open('api.json') as f:
    api_details = json.load(f)

# Access the API key and location values
API_KEY = api_details['key']
LOCATION = api_details['location']

In [6]:
!rm api.json # deletd .json file

### API calls
API only allows 30days of data per call. Loop through API calls for all necessary data and all .json files for each each month in a 'weather_data' directory.

In [5]:
import os
import requests
import json
from calendar import monthrange

start_year = 2009
end_year = 2022

# path of raw data
raw_data = 'weather_data'

# Create the 'weather_data' directory if it does not already exist
if not os.path.exists(raw_data):
    os.makedirs(raw_data)

#     # Loop through all years and months and create a file for each month
for year in range(start_year, end_year+1):
    for month in range(1, 13):
        # Get the start and end dates for the month
        _, num_days = monthrange(year, month) 
        start_date = f'{year}-{month:02d}-01'
        end_date = f'{year}-{month:02d}-{num_days:02d}'
        
        # Construct the URL and make the request
        url = f'https://api.worldweatheronline.com/premium/v1/past-weather.ashx?key={API_KEY}&q={LOCATION}&format=json&date={start_date}&enddate={end_date}'
        response = requests.get(url)
        data = response.json()
        
        # Get the filename and filepath for this month's data
        filename = f'weather_{year}-{month:02d}.json'
        filepath = os.path.join(raw_data, filename)
        
        # Check if file exists and create it if it does not
        if not os.path.exists(filepath):
            with open(filepath, 'w') as f:
                json.dump(data, f)
        else:
            print(f'File {filename} already exists.')


### Filter JSON

Filter all created .json files for the date, time, temperature, humidity, pressure, precipitation, and weather description keys and create new parsed .json files in a new directory called 'parsed_weather_data'.

In [7]:
parsed_data_path = 'parsed_weather_data'

# create the directory for the parsed weather data if it doesn't exist
if not os.path.exists(parsed_data_path):
    os.makedirs(parsed_data_path)

# loop through all the JSON files in the weather_data directory
for filename in os.listdir(raw_data):
    if filename.endswith('.json'):
        # extract the location and month from the filename
        location_name = filename.split('_')[0]
        month = filename.split('_')[1].split('.')[0]

        # create a new dictionary to store the parsed data for this month
        parsed_month_data = {}

        # Open the file and load the JSON data
        with open(f'{raw_data}/{filename}', 'r') as f:
            file_contents = f.read()
            parsed_data = json.loads(file_contents)

        # loop through the hourly data for all the dates in the JSON file
        for weather_data in parsed_data['data']['weather']:
            # extract the date for this set of hourly data
            date = weather_data['date']

            # create a new list to store the parsed data for this day
            parsed_day_data = []

            # loop through the hourly data for this date
            for hourly_data in weather_data['hourly']:
                # extract the values we're interested in
                tempC = hourly_data['tempC']
                humidity = hourly_data['humidity']
                pressure = hourly_data['pressure']
                weatherCond = hourly_data['weatherDesc'][0]['value']
                time = hourly_data['time']

                # add the values to the parsed_day_data list
                parsed_day_data.append({
                    'time': time,
                    'tempC': tempC,
                    'humidity': humidity,
                    'pressure': pressure,
                    'weatherCond': weatherCond
                })

            # add the parsed day data to the parsed_month_data dictionary
            parsed_month_data[date] = parsed_day_data

        # write the parsed_month_data to a new JSON file
        with open(f'{parsed_data_path}/{location_name}_{month}_parsed.json', 'w') as f:
            json.dump(parsed_month_data, f)


Combine all parsed .json files into a single .json file containing all data of interest.

In [8]:
# create the combined data dictionary
combined_data = {}

# combined parsed json data path
json_path = f'{parsed_data_path}/_combined_parsed_data.json'

# loop over each file in the directory
for filename in os.listdir(parsed_data_path):
    if filename.endswith('.json'):
        # load the contents of the file into a dictionary
        with open(os.path.join(parsed_data_path, filename)) as f:
            parsed_data = json.load(f)
        
        # loop over each date in the parsed data dictionary
        for date in parsed_data.keys():
            # if the date doesn't exist in the combined data dictionary, add it with an empty list as its value
            if date not in combined_data:
                combined_data[date] = []
            
            # add the weather data to the combined_data dictionary
            combined_data[date].extend(parsed_data[date])

# save the combined data to a new file
with open(json_path, 'w') as f:
    json.dump(combined_data, f)


### Convert to CSV

In [9]:
import csv
from datetime import datetime

# the csv that contains all values
csv_data_file = '_combined_parsed_data.csv'
# the parsed csv in the desired final format
final_file = "final_data.csv"

if not os.path.isfile(f'{parsed_data_path}/{csv_data_file}'):
    with open(f'{parsed_data_path}/{csv_data_file}', "x") as f:
        pass

# Write initial CSV file with headers and weather data
with open(f'{parsed_data_path}/{csv_data_file}', 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['date', 'time', 'tempC', 'humidity', 'pressure', 'weatherCond'])

    with open(json_path, 'r') as f:
        weather_data = json.load(f)

    for date in weather_data:
        for data in weather_data[date]:
            writer.writerow([date, data['time'], data['tempC'], data['humidity'], data['pressure'], data['weatherCond']])

# Read the initial CSV file, transform the date format, and write to the final file
with open(f'{parsed_data_path}/{csv_data_file}', 'r') as infile, open(final_file, 'w', newline='') as outfile:
    reader = csv.DictReader(infile)
    fieldnames = ['date', 'year', 'month', 'day', 'time', 'tempC', 'humidity', 'pressure', 'weatherCond']
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    writer.writeheader()

    for row in reader:
        date = datetime.strptime(row['date'], '%Y-%m-%d').strftime('%Y%m%d')
        year, month, day = date[:4], date[4:6], date[6:]
        writer.writerow({
            'date': date,
            'year': year,
            'month': month,
            'day': day,
            'time': row['time'],
            'tempC': row['tempC'],
            'humidity': row['humidity'],
            'pressure': row['pressure'],
            'weatherCond': row['weatherCond']
        })


In [None]:
# install pandas module
!pip install pandas

## Preprocessing of data

In [90]:
import pandas as pd

# read csv file
df = pd.read_csv('final_data.csv', sep=',') 
#show the first five rows
df.head()

Unnamed: 0,date,year,month,day,time,tempC,humidity,pressure,weatherCond
0,20210601,2021,6,1,0,19,74,1011,Patchy rain possible
1,20210601,2021,6,1,100,18,76,1011,Cloudy
2,20210601,2021,6,1,200,18,77,1011,Cloudy
3,20210601,2021,6,1,300,17,78,1011,Cloudy
4,20210601,2021,6,1,400,18,77,1010,Cloudy


In [91]:
# select only the values we need\
df = df[['weatherCond', 'tempC', 'humidity','pressure']]

In [82]:
df.shape

(122712, 4)

In [84]:
df.describe()

Unnamed: 0,tempC,humidity,pressure
count,122712.0,122712.0,122712.0
mean,18.251336,62.129254,1014.871969
std,7.520944,14.903668,6.115303
min,-2.0,18.0,984.0
25%,12.0,51.0,1011.0
50%,18.0,62.0,1014.0
75%,24.0,74.0,1019.0
max,43.0,99.0,1039.0


In [85]:
df.values

array([['Patchy rain possible', 19, 74, 1011],
       ['Cloudy', 18, 76, 1011],
       ['Cloudy', 18, 77, 1011],
       ...,
       ['Clear', 9, 46, 1021],
       ['Clear', 10, 49, 1021],
       ['Partly cloudy', 9, 53, 1022]], dtype=object)

In [86]:
df.info

<bound method DataFrame.info of                  weatherCond  tempC  humidity  pressure
0       Patchy rain possible     19        74      1011
1                     Cloudy     18        76      1011
2                     Cloudy     18        77      1011
3                     Cloudy     17        78      1011
4                     Cloudy     18        77      1010
...                      ...    ...       ...       ...
122707                 Clear     11        40      1020
122708                 Clear     10        43      1021
122709                 Clear      9        46      1021
122710                 Clear     10        49      1021
122711         Partly cloudy      9        53      1022

[122712 rows x 4 columns]>

### Assign Labels
Assign numbers to weather description values

In [92]:
# Assign an integer value to each weather description in the "weather description" column
df = df.dropna()  # remove empty rows

# Convert each category into an integer
for i in df.index:
    if df.loc[i, "weatherCond"] == 'Clear':
        df.loc[i, "weatherCond"] = 0
    elif df.loc[i, "weatherCond"] == 'Sunny':
        df.loc[i, "weatherCond"] = 0
    elif df.loc[i, "weatherCond"] == 'Partly cloudy':
        df.loc[i, "weatherCond"] = 1
    elif df.loc[i, "weatherCond"] == 'Cloudy':
        df.loc[i, "weatherCond"] = 1
    elif df.loc[i, "weatherCond"] == 'Overcast':
        df.loc[i, "weatherCond"] = 1
    elif df.loc[i, "weatherCond"] == 'Moderate rain at times':
        df.loc[i, "weatherCond"] = 2
    elif df.loc[i, "weatherCond"] == 'Patchy rain possible':
        df.loc[i, "weatherCond"] = 2
    elif df.loc[i, "weatherCond"] == 'Moderate or heavy rain shower':
        df.loc[i, "weatherCond"] = 3
    elif df.loc[i, "weatherCond"] == 'Heavy rain at times':
        df.loc[i, "weatherCond"] = 3
    elif df.loc[i, "weatherCond"] == 'Light freezing rain':
        df.loc[i, "weatherCond"] = 4
    elif df.loc[i, "weatherCond"] == 'Patchy moderate snow':
        df.loc[i, "weatherCond"] = 4
    else:
        df = df.drop([i])  # we don't consider other classes, so we drop them

# Cast weatherCond column from string to int
df["weatherCond"] = df["weatherCond"].astype(int)


In [88]:
unique_classes = np.unique(df['weatherCond'])
print(unique_classes)


[0 1 2 3 4]


### Case1: Decision Tree Classifier

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

X = df.drop(columns=['weatherCond'])
y = df['weatherCond']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train,y_train)
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions)
score

0.6907875972782463

### Case 2: Dense Neural Network

### Split Data

In [93]:
from keras.utils import to_categorical

labels = to_categorical(df.pop('weatherCond')) #Create classes from the labels

import numpy as np #import numpy library, used for arithmetic

features = np.array(df) #convert our dataframe into ndarray, only array type that neural network takes as input

In [94]:
features

array([[  19,   74, 1011],
       [  18,   76, 1011],
       [  18,   77, 1011],
       ...,
       [   9,   46, 1021],
       [  10,   49, 1021],
       [   9,   53, 1022]])

In [78]:
labels

array([2, 1, 1, ..., 0, 0, 1])

In [95]:
from sklearn.model_selection import train_test_split

#Split the dataset into training set 80% and test set 20%
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.20, shuffle=True)



## The Model

The model used is a densely connected neural network (DNN). The activation function used is ReLU (rectified linear unit), and the output layer uses the softmax activation function. The dropout regularization technique is applied with a rate of 0.4, which randomly drops connections between neurons to prevent overfitting.

The model is compiled using the Adam optimizer, categorical cross-entropy loss function, and accuracy metric. The summary method is called to display the model's architecture and parameters.

In [96]:
import tensorflow as tf
from tensorflow.keras import regularizers
from keras.utils import to_categorical

# Parameters
NB_classes = 5 # Number of outputs
NB_neurones = 30 # Main number of neurons
NB_features = 3 # Number of inputs
activation_func = tf.keras.activations.relu # Activation function used

#Densly connected neural network
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func,input_shape=(NB_features,)),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dropout(0.4), #drop randomly some connection to avoid overfiting
                             #softmax will output an array containing probabilities of each classes
                             #the highest one is the predicted class
                             tf.keras.layers.Dense(NB_classes,activation=tf.keras.activations.softmax)
])

model.compile(optimizer="adam",loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) #compile the model

model.summary() #to see the paramter of our model

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_143 (Dense)           (None, 30)                120       
                                                                 
 dense_144 (Dense)           (None, 30)                930       
                                                                 
 dense_145 (Dense)           (None, 30)                930       
                                                                 
 dense_146 (Dense)           (None, 30)                930       
                                                                 
 dense_147 (Dense)           (None, 30)                930       
                                                                 
 dense_148 (Dense)           (None, 30)                930       
                                                                 
 dense_149 (Dense)           (None, 30)               

## Train the Model

In [97]:
model.fit(x=train_features,
          y=train_labels,
          epochs=20,
          validation_data=(test_features,test_labels),
          verbose=1,
          shuffle=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f2a94468fa0>

## Evaluate

In [98]:
performance=model.evaluate(test_features,test_labels, batch_size=32, verbose=1, steps=None, )[1] * 100
print('Final accuracy : ', round(performance), '%')

Final accuracy :  70 %


## Converting model for microcontroller

Tensorflow lite

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model) #create a converter
tflite_model = converter.convert() #convert the model without quantization


open("/content/model.tflite","wb").write(tflite_model) #Create a file containing our tflite model

Header (c++) file

In [100]:
!apt-get install -qq xxd #installing the tool

In [101]:
!echo "const unsigned char model[] = {" > /content/model.h
!cat /content/model.tflite | xxd -i >> /content/model.h #create an hexadecimal array containing all our parameters
!echo "};" >> /content/model.h

files.download("/content/model.h") #automaticly download your file

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>