In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

### The aim of this model is to show what my approach can be for predicting future weather conditions. To predict the weather conditions of an area, we must need all the previous weather conditions represented in  time series format. Along with that, the model also requires necessary feature values of that day like humidity, pressure, wind speed, precipitation, and cloud coverage. If these values are available, then we can build our model in a way that compares last year's information with current years' information. That means last year's weather values will have an important impact on predicting the condition. Basically, I will consider current trends of the weather and last year's trend to find out how much change has occurred in terms of  weather conditions based on time series data available. Another important part would be diving into each feature and figuring out which feature has how much effect on the given outputs.
Temparature depends on all the components of enviorment, so I tried to add all the components on the model training. Worked with a small amount of data to maek the model simple. To provide proper solutions I believe, I have to dive deep and look how much each feature affecting the temparature value. Then the model will provide better output

In [2]:
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 23.7115253,
	"longitude": 90.4111451,
    "timezone": "Asia/Dacca",
	"hourly": [
        "temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation_probability", "precipitation", "rain", 
        "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "visibility", 
        "wind_speed_10m", "wind_speed_80m", "wind_speed_120m", "wind_speed_180m", "wind_direction_10m", "wind_direction_80m", "wind_direction_120m", 
        "wind_direction_180m", "wind_gusts_10m", "soil_temperature_0cm", "soil_temperature_6cm", "soil_temperature_18cm", "soil_temperature_54cm", 
        "soil_moisture_0_to_1cm", "soil_moisture_1_to_3cm", "soil_moisture_3_to_9cm", "soil_moisture_9_to_27cm", "soil_moisture_27_to_81cm"
    ],
	"past_days": 92
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation_probability = hourly.Variables(4).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(5).ValuesAsNumpy()
hourly_rain = hourly.Variables(6).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(13).ValuesAsNumpy()
hourly_visibility = hourly.Variables(14).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(15).ValuesAsNumpy()
hourly_wind_speed_80m = hourly.Variables(16).ValuesAsNumpy()
hourly_wind_speed_120m = hourly.Variables(17).ValuesAsNumpy()
hourly_wind_speed_180m = hourly.Variables(18).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(19).ValuesAsNumpy()
hourly_wind_direction_80m = hourly.Variables(20).ValuesAsNumpy()
hourly_wind_direction_120m = hourly.Variables(21).ValuesAsNumpy()
hourly_wind_direction_180m = hourly.Variables(22).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(23).ValuesAsNumpy()
hourly_soil_temperature_0cm = hourly.Variables(24).ValuesAsNumpy()
hourly_soil_temperature_6cm = hourly.Variables(25).ValuesAsNumpy()
hourly_soil_temperature_18cm = hourly.Variables(26).ValuesAsNumpy()
hourly_soil_temperature_54cm = hourly.Variables(27).ValuesAsNumpy()
hourly_soil_moisture_0_to_1cm = hourly.Variables(28).ValuesAsNumpy()
hourly_soil_moisture_1_to_3cm = hourly.Variables(29).ValuesAsNumpy()
hourly_soil_moisture_3_to_9cm = hourly.Variables(30).ValuesAsNumpy()
hourly_soil_moisture_9_to_27cm = hourly.Variables(31).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation_probability"] = hourly_precipitation_probability
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["visibility"] = hourly_visibility
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_speed_80m"] = hourly_wind_speed_80m
hourly_data["wind_speed_120m"] = hourly_wind_speed_120m
hourly_data["wind_speed_180m"] = hourly_wind_speed_180m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_80m"] = hourly_wind_direction_80m
hourly_data["wind_direction_120m"] = hourly_wind_direction_120m
hourly_data["wind_direction_180m"] = hourly_wind_direction_180m
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_data["soil_temperature_0cm"] = hourly_soil_temperature_0cm
hourly_data["soil_temperature_6cm"] = hourly_soil_temperature_6cm
hourly_data["soil_temperature_18cm"] = hourly_soil_temperature_18cm
hourly_data["soil_temperature_54cm"] = hourly_soil_temperature_54cm
hourly_data["soil_moisture_0_to_1cm"] = hourly_soil_moisture_0_to_1cm
hourly_data["soil_moisture_1_to_3cm"] = hourly_soil_moisture_1_to_3cm
hourly_data["soil_moisture_3_to_9cm"] = hourly_soil_moisture_3_to_9cm
hourly_data["soil_moisture_9_to_27cm"] = hourly_soil_moisture_9_to_27cm
hourly_dataframe = pd.DataFrame(data = hourly_data)
# print(hourly_dataframe)

In [3]:
hourly_dataframe.head()

Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,apparent_temperature,precipitation_probability,precipitation,rain,pressure_msl,surface_pressure,...,wind_direction_180m,wind_gusts_10m,soil_temperature_0cm,soil_temperature_6cm,soil_temperature_18cm,soil_temperature_54cm,soil_moisture_0_to_1cm,soil_moisture_1_to_3cm,soil_moisture_3_to_9cm,soil_moisture_9_to_27cm
0,2024-02-21 18:00:00+00:00,22.078499,94.0,21.067076,25.969887,0.0,0.0,0.0,1009.346741,20.0,...,10.799999,21.3785,23.178499,24.728498,22.928499,0.156,0.169,0.191,0.218,0.271
1,2024-02-21 19:00:00+00:00,21.828499,95.0,20.991089,25.720743,15.0,0.0,0.0,1008.945923,73.0,...,9.72,21.3785,22.828499,24.5285,22.978498,0.159,0.169,0.191,0.218,0.271
2,2024-02-21 20:00:00+00:00,21.8785,95.0,21.040775,25.984108,30.0,0.0,0.0,1008.64679,100.0,...,7.559999,21.578499,22.678499,24.2785,22.978498,0.161,0.17,0.191,0.218,0.271
3,2024-02-21 21:00:00+00:00,21.978498,95.0,21.140142,25.821695,45.0,0.0,0.0,1008.148254,100.0,...,10.08,21.928499,22.678499,24.078499,22.978498,0.162,0.17,0.191,0.218,0.271
4,2024-02-21 22:00:00+00:00,22.078499,97.0,21.579643,25.941376,45.0,0.1,0.1,1007.549866,100.0,...,13.32,22.0285,22.678499,23.8785,22.978498,0.165,0.171,0.191,0.218,0.271


In [22]:
hourly_dataframe.to_csv("last_3_months_data.csv")

In [23]:
hourly_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2376 entries, 0 to 2375
Data columns (total 32 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   date                       2376 non-null   datetime64[ns, UTC]
 1   temperature_2m             2376 non-null   float32            
 2   relative_humidity_2m       2376 non-null   float32            
 3   dew_point_2m               2376 non-null   float32            
 4   apparent_temperature       2376 non-null   float32            
 5   precipitation_probability  2376 non-null   float32            
 6   precipitation              2376 non-null   float32            
 7   rain                       2376 non-null   float32            
 8   pressure_msl               2376 non-null   float32            
 9   surface_pressure           2376 non-null   float32            
 10  cloud_cover                2376 non-null   float32            
 11  clou

### I tried to make 24 hours value into small sequence as model is predicting a days weather, it is very important to take into account last 24 hours values. I also normalized the feature values so that model does not become confused with the higher values of some features. It ensures that our model will train more effectively and efficiently by maintaining a consistent scale across all input features.

In [4]:
features = hourly_dataframe[[
    "date", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation_probability", "precipitation", "rain", 
    "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "visibility", 
    "wind_speed_10m", "wind_speed_80m", "wind_speed_120m", "wind_speed_180m", "wind_direction_10m", "wind_direction_80m", "wind_direction_120m", 
    "wind_direction_180m", "wind_gusts_10m", "soil_temperature_0cm", "soil_temperature_6cm", "soil_temperature_18cm", "soil_temperature_54cm", 
    "soil_moisture_0_to_1cm", "soil_moisture_1_to_3cm", "soil_moisture_3_to_9cm", "soil_moisture_9_to_27cm"
]]

In [5]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features.iloc[:, 1:])

In [28]:
import numpy as np

scaled_data = np.concatenate((hourly_dataframe[["temperature_2m"]], scaled_features), axis=1)

In [31]:
def create_sequences(data, seq_length=24):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:(i + seq_length), 1:])
        y.append(data[i + seq_length, 0])
    return np.array(X), np.array(y)

In [32]:
X, y = create_sequences(scaled_data)

### As, all the values are in numpy array using train test sprlit function is not possible in this case, so manually splitted the values and targets

In [33]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

### Tried to make the model as simple as possible, Reason of choosing LSTM is memory its long and short term memory will be effective for our input data

In [34]:
from keras import Sequential
from keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(24, X.shape[2])))
model.add(LSTM(50))
model.add(Dense(1))

  super().__init__(**kwargs)


In [35]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [36]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 620.5546 - val_loss: 438.5369
Epoch 2/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 291.3257 - val_loss: 319.1450
Epoch 3/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 206.5000 - val_loss: 245.1274
Epoch 4/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 151.0296 - val_loss: 189.3222
Epoch 5/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 107.6932 - val_loss: 146.6133
Epoch 6/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 80.3365 - val_loss: 114.2687
Epoch 7/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 63.6417 - val_loss: 90.3976
Epoch 8/20
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 50.1504 - val_loss: 72.4929
Epoch 9/20
[1m47/47[0m [3

<keras.src.callbacks.history.History at 0x1fb1de74ca0>

In [37]:
print(model.evaluate(X_test, y_test))

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 19.4338 
19.39682388305664


In [38]:
predictions = model.predict(X_test)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [39]:
predictions

array([[27.43399 ],
       [27.434027],
       [27.434048],
       [27.43406 ],
       [27.433998],
       [27.434029],
       [27.434069],
       [27.434052],
       [27.43395 ],
       [27.43399 ],
       [27.433975],
       [27.433949],
       [27.433945],
       [27.433958],
       [27.433962],
       [27.433962],
       [27.433952],
       [27.433943],
       [27.43394 ],
       [27.433939],
       [27.433937],
       [27.433908],
       [27.43387 ],
       [27.433867],
       [27.433872],
       [27.433891],
       [27.433916],
       [27.433943],
       [27.433886],
       [27.43389 ],
       [27.43392 ],
       [27.433954],
       [27.43397 ],
       [27.433964],
       [27.433918],
       [27.433842],
       [27.433784],
       [27.433857],
       [27.43386 ],
       [27.43388 ],
       [27.433857],
       [27.433897],
       [27.433943],
       [27.433918],
       [27.433975],
       [27.433882],
       [27.433931],
       [27.433874],
       [27.433897],
       [27.433945],


In [41]:
model.save_weights('lstm-checkpoints.weights.h5')