#tinyMeteo - weather forecasting on microcontroller

## Collect Data 

Collect weather data for years 2009-2022 for the desired location from World Weather Online API in JSON format.

In [31]:
!rm -r weather_data

In [28]:
from google.colab import files 
files.upload() #upload a .json file with Kaggle API key and location coordinates or location name

Saving api.json to api.json


{'api.json': b'{"username":"manoliskelaidis","key":"504d752d56674a589aa62550230805", "location":"37.995453,23.792475"}'}

In [32]:
import json

# Read the uploaded files for the API key and location detials:
with open('api.json') as f:
    api_details = json.load(f)

# Access the API key and location values
API_KEY = api_details['key']
LOCATION = api_details['location']

In [None]:
!rm api.json # deletd .json file

### API calls
API only allows 30days of data per call. Loop through API calls for all necessary data and all .json files for each each month in a 'weather_data' directory.

In [33]:
import os
import requests
import json
from calendar import monthrange

start_year = 2009
end_year = 2022

# Create the 'weather_data' directory if it does not already exist
if not os.path.exists('weather_data'):
    os.makedirs('weather_data')

raw_data = 'weather_data'

#     # Loop through all years and months and create a file for each month
for year in range(start_year, end_year+1):
    for month in range(1, 13):
        # Get the start and end dates for the month
        _, num_days = monthrange(year, month) 
        start_date = f'{year}-{month:02d}-01'
        end_date = f'{year}-{month:02d}-{num_days:02d}'
        
        # Construct the URL and make the request
        url = f'https://api.worldweatheronline.com/premium/v1/past-weather.ashx?key={API_KEY}&q={LOCATION}&format=json&date={start_date}&enddate={end_date}'
        response = requests.get(url)
        data = response.json()
        
        # Get the filename and filepath for this month's data
        filename = f'weather_{year}-{month:02d}.json'
        filepath = os.path.join('weather_data', filename)
        
        # Check if file exists and create it if it does not
        if not os.path.exists(filepath):
            with open(filepath, 'w') as f:
                json.dump(data, f)
        else:
            print(f'File {filename} already exists.')


### Filter JSON

Filter all created .json files for the date, time, temperature, humidity, pressure, and weather description keys and create new parsed .json files in a new directory called 'parsed_weather_data'.

In [34]:

# create the directory for the parsed weather data if it doesn't exist
if not os.path.exists('parsed_weather_data'):
    os.makedirs('parsed_weather_data')

# loop through all the JSON files in the weather_data directory
for filename in os.listdir('weather_data'):
    if filename.endswith('.json'):
        # extract the location and month from the filename
        location_name = filename.split('_')[0]
        month = filename.split('_')[1].split('.')[0]

        # create a new dictionary to store the parsed data for this month
        parsed_month_data = {}

        # Open the file and load the JSON data
        with open(f'weather_data/{filename}', 'r') as f:
            file_contents = f.read()
            parsed_data = json.loads(file_contents)

        # loop through the hourly data for all the dates in the JSON file
        for weather_data in parsed_data['data']['weather']:
            # extract the date for this set of hourly data
            date = weather_data['date']

            # create a new list to store the parsed data for this day
            parsed_day_data = []

            # loop through the hourly data for this date
            for hourly_data in weather_data['hourly']:
                # extract the values we're interested in
                tempC = hourly_data['tempC']
                humidity = hourly_data['humidity']
                pressure = hourly_data['pressure']
                weatherCond = hourly_data['weatherDesc'][0]['value']
                time = hourly_data['time']

                # add the values to the parsed_day_data list
                parsed_day_data.append({
                    'time': time,
                    'tempC': tempC,
                    'humidity': humidity,
                    'pressure': pressure,
                    'weatherCond': weatherCond
                })

            # add the parsed day data to the parsed_month_data dictionary
            parsed_month_data[date] = parsed_day_data

        # write the parsed_month_data to a new JSON file
        with open(f'parsed_weather_data/{location_name}_{month}_parsed.json', 'w') as f:
            json.dump(parsed_month_data, f)


Combine all parsed .json files into a single .json file containing all data of interest.

In [35]:
# create the combined data dictionary
combined_data = {}

# loop over each file in the directory
for filename in os.listdir('parsed_weather_data'):
    if filename.endswith('.json'):
        # load the contents of the file into a dictionary
        with open(os.path.join('parsed_weather_data', filename)) as f:
            parsed_data = json.load(f)
        
        # loop over each date in the parsed data dictionary
        for date in parsed_data.keys():
            # if the date doesn't exist in the combined data dictionary, add it with an empty list as its value
            if date not in combined_data:
                combined_data[date] = []
            
            # add the weather data to the combined_data dictionary
            combined_data[date].extend(parsed_data[date])

# save the combined data to a new file
with open('combined_parsed_data.json', 'w') as f:
    json.dump(combined_data, f)


### Convert to CSV

In [37]:
import csv

csv_data_file = 'weather_data.csv'

# read the weather data from the JSON file
with open('combined_parsed_data.json', 'r') as f:
    weather_data = json.load(f)

# create a new .csv filew if it doesn't exist.
if not os.path.isfile(csv_data_file):
    with open(csv_data_file, "x") as f:
        pass

# write the weather data to a CSV file
with open('weather_data.csv', 'w', newline='') as f:
    writer = csv.writer(f)
    
    # write the header row
    writer.writerow(['date', 'time', 'tempC', 'humidity', 'pressure', 'weatherCond'])
    
    # write the data rows
    for date in weather_data:
        for data in weather_data[date]:
            writer.writerow([date, data['time'], data['tempC'], data['humidity'], data['pressure'], data['weatherCond']])


In [38]:
# install pandas module
!pip install pandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Preprocessing of data

In [103]:
import pandas as pd

# read csv file
df = pd.read_csv('weather_data.csv', sep=',') 
#show the first five rows
df.head()

Unnamed: 0,date,time,tempC,humidity,pressure,weatherCond
0,2021-01-01,0,11,67,1015,Clear
1,2021-01-01,100,11,67,1015,Clear
2,2021-01-01,200,12,68,1015,Clear
3,2021-01-01,300,12,69,1015,Clear
4,2021-01-01,400,12,69,1015,Clear


In [104]:
# select only the values we need\
df = df[['tempC','humidity','pressure','weatherCond']]

In [105]:
df.shape

(122476, 4)

In [99]:
df.describe()

Unnamed: 0,time,tempC,humidity,pressure
count,122476.0,122476.0,122476.0,122476.0
mean,1150.057971,18.313661,61.900944,1014.876049
std,692.210942,7.560368,15.07819,6.108621
min,0.0,-2.0,17.0,984.0
25%,600.0,12.0,51.0,1011.0
50%,1200.0,18.0,62.0,1014.0
75%,1725.0,24.0,73.0,1019.0
max,2300.0,44.0,99.0,1039.0


In [54]:
df.values

array([[11, 67, 1015, 'Clear'],
       [11, 67, 1015, 'Clear'],
       [12, 68, 1015, 'Clear'],
       ...,
       [23, 48, 1005, 'Clear'],
       [23, 48, 1005, 'Clear'],
       [22, 49, 1005, 'Clear']], dtype=object)

In [100]:
df.info

<bound method DataFrame.info of               date  time  tempC  humidity  pressure weatherCond
0       2021-01-01     0     11        67      1015       Clear
1       2021-01-01   100     11        67      1015       Clear
2       2021-01-01   200     12        68      1015       Clear
3       2021-01-01   300     12        69      1015       Clear
4       2021-01-01   400     12        69      1015       Clear
...            ...   ...    ...       ...       ...         ...
122471  2010-05-31  1900     25        44      1005       Clear
122472  2010-05-31  2000     24        46      1005       Clear
122473  2010-05-31  2100     23        48      1005       Clear
122474  2010-05-31  2200     23        48      1005       Clear
122475  2010-05-31  2300     22        49      1005       Clear

[122476 rows x 6 columns]>

### Assign Labels
Assign numbers to weather description values

In [106]:
# Assign an integer value to each weather description in the "weather description" column
df = df.dropna() #remove empty rows

#Convert each category into an interger
for i in df.index:
  if df["weatherCond"][i]=='Clear':
    df["weatherCond"][i] = 0
  elif df["weatherCond"][i]=='Sunny':
    df["weatherCond"][i] = 1
  elif df["weatherCond"][i]=='Partly cloudy':
    df["weatherCond"][i] = 2
  elif df["weatherCond"][i]=='Cloudy':
    df["weatherCond"][i] = 3
  elif df["weatherCond"][i]=='Overcast':
    df["weatherCond"][i] = 4
  elif df["weatherCond"][i] == 'Moderate rain at times':
      df["weatherCond"][i] = 5
  elif df["weatherCond"][i]=='Patchy rain possible':
    df["weatherCond"][i] = 6
  elif df["weatherCond"][i]=='Moderate or heavy rain shower':
    df["weatherCond"][i] = 7
  elif df["weatherCond"][i]=='Heavy rain at times':
    df["weatherCond"][i] = 8
  elif df["weatherCond"][i]=='Light freezing rain':
    df["weatherCond"][i] = 9
  elif df["weatherCond"][i]=='Patchy moderate snow':
    df["weatherCond"][i] = 10
  else:
    df = df.drop([i]) #we don't consider other classes so we drop it

# Cast weatherCond column from string to int
df["weatherCond"] = df["weatherCond"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["weatherCond"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["weatherCond"][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["weatherCond"][i] = 2
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["weatherCond"][i] = 3
A value is trying to be set on a copy of a slice from a DataFrame

S

### Case1: Decision Tree Classifier

In [102]:
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

X = df.drop(columns=['weatherCond'])
y = df['weatherCond']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = DecisionTreeClassifier()
model.fit(X_train,y_train)
predictions = model.predict(X_test)

score = accuracy_score(y_test, predictions)
score

ValueError: ignored

### Case 2: Dense Neural Network

### Split Data

In [107]:
import numpy as np
from keras.utils import to_categorical

labels = to_categorical(df.pop('weatherCond')) #Create classes from the labels

features = np.array(df) #convert dataframe into ndarray, only array type that neural network takes as input

In [108]:
features

array([[  11,   67, 1015],
       [  11,   67, 1015],
       [  12,   68, 1015],
       ...,
       [  23,   48, 1005],
       [  23,   48, 1005],
       [  22,   49, 1005]])

In [59]:
labels

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [109]:
from sklearn.model_selection import train_test_split


#Split the dataset into training set 85% and test set 15%
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.15,shuffle=True) 



## The Model

The model used is a densely connected neural network (DNN). The activation function used is ReLU (rectified linear unit), and the output layer uses the softmax activation function. The dropout regularization technique is applied with a rate of 0.4, which randomly drops connections between neurons to prevent overfitting.

The model is compiled using the Adam optimizer, categorical cross-entropy loss function, and accuracy metric. The summary method is called to display the model's architecture and parameters.

In [110]:
import tensorflow as tf
from tensorflow.keras import regularizers

#Parameters :
NB_classes = 11 #number of outputs
NB_neurones = 8 #main number of neurones
NB_features = 3 #number of inputs
activation_func = tf.keras.activations.relu #activation function used

#Densly connected neural network
model = tf.keras.Sequential([
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func,input_shape=(NB_features,)),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dense(NB_neurones,activation=activation_func),
                             tf.keras.layers.Dropout(0.4), #drop randomly some connection to avoid overfiting
                             #softmax will output an array containing probabilities of each classes
                             #the highest one is the predicted class
                             tf.keras.layers.Dense(NB_classes,activation=tf.keras.activations.softmax)
])

custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(optimizer=custom_optimizer,loss=tf.keras.losses.categorical_crossentropy, metrics=['accuracy']) #compile the model

model.summary() #to see the paramter of our model

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_172 (Dense)           (None, 8)                 32        
                                                                 
 dense_173 (Dense)           (None, 8)                 72        
                                                                 
 dense_174 (Dense)           (None, 8)                 72        
                                                                 
 dense_175 (Dense)           (None, 8)                 72        
                                                                 
 dense_176 (Dense)           (None, 8)                 72        
                                                                 
 dense_177 (Dense)           (None, 8)                 72        
                                                                 
 dense_178 (Dense)           (None, 8)                

## Train the Model

In [None]:
model.fit(x=train_features,
          y=train_labels,
          epochs=800,
          validation_data=(test_features,test_labels),
          verbose=1,
          shuffle=True) #Train our model

Epoch 1/800
Epoch 2/800
Epoch 3/800
Epoch 4/800
Epoch 5/800
Epoch 6/800
Epoch 7/800
Epoch 8/800
Epoch 9/800
Epoch 10/800
Epoch 11/800
Epoch 12/800
Epoch 13/800
Epoch 14/800
Epoch 15/800
Epoch 16/800
Epoch 17/800
Epoch 18/800
Epoch 19/800
Epoch 20/800
Epoch 21/800
Epoch 22/800
Epoch 23/800
Epoch 24/800
Epoch 25/800
Epoch 26/800
Epoch 27/800
Epoch 28/800
Epoch 29/800
Epoch 30/800
Epoch 31/800
Epoch 32/800
Epoch 33/800
Epoch 34/800
Epoch 35/800
Epoch 36/800
Epoch 37/800
Epoch 38/800
Epoch 39/800
Epoch 40/800
Epoch 41/800
Epoch 42/800
Epoch 43/800
Epoch 44/800
Epoch 45/800
Epoch 46/800
Epoch 47/800
Epoch 48/800
Epoch 49/800
Epoch 50/800
Epoch 51/800
Epoch 52/800
Epoch 53/800
Epoch 54/800
Epoch 55/800
Epoch 56/800
Epoch 57/800
Epoch 58/800
Epoch 59/800
Epoch 60/800
Epoch 61/800
Epoch 62/800
Epoch 63/800
Epoch 64/800
Epoch 65/800
Epoch 66/800
Epoch 67/800
Epoch 68/800
Epoch 69/800
Epoch 70/800
Epoch 71/800
Epoch 72/800
Epoch 73/800
Epoch 74/800
Epoch 75/800
Epoch 76/800
Epoch 77/800
Epoch 78

## Evaluate

In [87]:
performance=model.evaluate(test_features,test_labels, batch_size=32, verbose=1, steps=None, )[1] * 100
print('Final accuracy : ', round(performance), '%')

Final accuracy :  47 %
