# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"><img src="../../images/icon102.png" width="38px"></img> **Hopsworks Feature Store** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 02: Feature Pipeline</span>

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/advanced_tutorials/air_quality/2_feature_pipeline.ipynb)


## 🗒️ This notebook is divided into the following sections:
1. Parse Data
2. Feature Group Insertion

### <span style='color:#ff5f27'> 📝 Imports

In [11]:
import datetime
import time
import requests
import pandas as pd
import json

from functions import *
import features.air_quality

import warnings
warnings.filterwarnings("ignore")

In [50]:
with open('target_cities.json') as json_file:
    target_cities = json.load(json_file)

In [13]:
today = datetime.date.today()

In [14]:
today, str(today)

(datetime.date(2023, 4, 20), '2023-04-20')

---

## <span style='color:#ff5f27'> 🌫 Filling gaps in Air Quality data (PM2.5)</span>

### First time we will determine the 'last update date' using our backfill data
#### Next time we will use `feature view` method from Hopsworks Feature Store

In [95]:
df_air_quality = pd.read_csv("data/backfill_pm2_5.csv")
df_weather = pd.read_csv("data/backfill_weather.csv")

In [76]:
last_dates_aq = df_air_quality[["date", "city_name"]].groupby("city_name").max()
last_dates_aq.date = last_dates_aq.date.astype(str)

# here is a dictionary with city names as keys and last updated date as values
last_dates_aq = last_dates_aq.to_dict()["date"]

In [77]:
last_dates_aq["Berlin"], last_dates_aq["Columbus"]

('2023-04-17', '2023-04-17')

### <span style='color:#ff5f27'>  🧙🏼‍♂️ Parsing PM2.5 data

In [80]:
start_of_cell = time.time()

df_aq_raw = pd.DataFrame()

for continent in target_cities:
    for city_name, coords in target_cities[continent].items():
        df_ = get_aqi_data_from_open_meteo(city_name=city_name,
                                           coordinates=coords,
                                           start_date=last_dates_aq[city_name],
                                           end_date=str(today))
        df_aq_raw = pd.concat([df_aq_raw, df_]).reset_index(drop=True)
    
end_of_cell = time.time()
print("-" * 64)
print(f"Parsed new PM2.5 data for ALL locations up to {str(today)}.")
print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n")

Processed PM2_5 for Amsterdam since 2023-04-17 till 2023-04-20.
Took 1.23 sec.

Processed PM2_5 for Athina since 2023-04-17 till 2023-04-20.
Took 1.23 sec.

Processed PM2_5 for Berlin since 2023-04-17 till 2023-04-20.
Took 0.66 sec.

Processed PM2_5 for Gdansk since 2023-04-17 till 2023-04-20.
Took 0.48 sec.

Processed PM2_5 for Kraków since 2023-04-17 till 2023-04-20.
Took 0.7 sec.

Processed PM2_5 for London since 2023-04-17 till 2023-04-20.
Took 0.52 sec.

Processed PM2_5 for Madrid since 2023-04-17 till 2023-04-20.
Took 1.04 sec.

Processed PM2_5 for Marseille since 2023-04-17 till 2023-04-20.
Took 0.62 sec.

Processed PM2_5 for Milano since 2023-04-17 till 2023-04-20.
Took 0.4 sec.

Processed PM2_5 for München since 2023-04-17 till 2023-04-20.
Took 0.56 sec.

Processed PM2_5 for Napoli since 2023-04-17 till 2023-04-20.
Took 0.4 sec.

Processed PM2_5 for Paris since 2023-04-17 till 2023-04-20.
Took 0.34 sec.

Processed PM2_5 for Sevilla since 2023-04-17 till 2023-04-20.
Took 0.35 s

In [96]:
# calculate 30 days ago from today
date_threshold = today - datetime.timedelta(days=30)

df_air_quality.date = (df_air_quality.date).astype(str)
# filter rows based on date threshold
df_air_quality = df_air_quality[df_air_quality['date'] > str(date_threshold)]

df_air_quality

Unnamed: 0,city_name,date,pm2_5
3702,Amsterdam,2023-02-20,9.0
3703,Amsterdam,2023-02-21,18.0
3704,Amsterdam,2023-02-22,29.0
3705,Amsterdam,2023-02-23,19.0
3706,Amsterdam,2023-02-24,6.0
...,...,...,...
158060,Tampa,2023-04-13,6.1
158061,Tampa,2023-04-14,7.4
158062,Tampa,2023-04-15,12.1
158063,Tampa,2023-04-16,12.2


In [97]:
# we need the previous data to calculate aggregation functions
df_aq_update = pd.concat([df_air_quality, df_aq_raw]).reset_index(drop=True)
df_aq_update = df_aq_update.drop_duplicates(subset=['city_name', 'date'])

In [98]:
df_aq_update.shape

(2744, 3)

### <span style="color:#ff5f27;">🛠 Feature Engineering PM2.5</span>

In [99]:
df_aq_update['date'] = pd.to_datetime(df_aq_update['date'])

In [100]:
features.air_quality.shift_pm_2_5(df_aq_update, days=7) # add features about 7 previous PM2.5 values

features.air_quality.moving_average(df_aq_update, 7)
features.air_quality.moving_average(df_aq_update, 14)
features.air_quality.moving_average(df_aq_update, 28)

for i in [7, 14, 28]:
    for func in [features.air_quality.moving_std,
                 features.air_quality.exponential_moving_average,
                 features.air_quality.exponential_moving_std
                 ]:
        func(df_aq_update, i)
        

df_aq_update = df_aq_update.sort_values(by=["date", "pm2_5"]).dropna()
df_aq_update = df_aq_update.reset_index(drop=True)

In [101]:
features.air_quality.year(df_aq_update)
features.air_quality.day_of_month(df_aq_update)
features.air_quality.month(df_aq_update)
features.air_quality.day_of_week(df_aq_update)
features.air_quality.is_weekend(df_aq_update)
features.air_quality.sin_day_of_year(df_aq_update)
features.air_quality.cos_day_of_year(df_aq_update)
features.air_quality.sin_day_of_week(df_aq_update)
features.air_quality.cos_day_of_week(df_aq_update)

In [102]:
df_aq_update.isna().sum().sum()

0

In [103]:
df_aq_update.shape

(1484, 31)

In [105]:
df_aq_update.columns

Index(['city_name', 'date', 'pm2_5', 'pm_2_5_previous_1_day',
       'pm_2_5_previous_2_day', 'pm_2_5_previous_3_day',
       'pm_2_5_previous_4_day', 'pm_2_5_previous_5_day',
       'pm_2_5_previous_6_day', 'pm_2_5_previous_7_day', 'mean_7_days',
       'mean_14_days', 'mean_28_days', 'std_7_days', 'exp_mean_7_days',
       'exp_std_7_days', 'std_14_days', 'exp_mean_14_days', 'exp_std_14_days',
       'std_28_days', 'exp_mean_28_days', 'exp_std_28_days', 'year',
       'day_of_month', 'month', 'day_of_week', 'is_weekend', 'sin_day_of_year',
       'cos_day_of_year', 'sin_day_of_week', 'cos_day_of_week'],
      dtype='object')

---

## <span style='color:#ff5f27'> 🌦 Filling gaps in Weather data</span>

In [108]:
last_dates_weather = df_weather[["date", "city_name"]].groupby("city_name").max()
last_dates_weather.date = last_dates_weather.date.astype(str)
last_dates_weather = last_dates_weather.to_dict()["date"]

### <span style='color:#ff5f27'>  🧙🏼‍♂️ Parsing Weather data

In [109]:
start_of_cell = time.time()

df_weather_update = pd.DataFrame()

for continent in target_cities:
    for city_name, coords in target_cities[continent].items():
        df_ = get_weather_data_from_open_meteo(city_name=city_name,
                                               coordinates=coords,
                                               start_date=last_dates_aq[city_name],
                                               end_date=str(today),
                                               forecast=True)
        df_weather_update = pd.concat([df_weather_update, df_]).reset_index(drop=True)
    
end_of_cell = time.time()
print("-" * 64)
print(f"Parsed new weather data for ALL cities up to {str(today)}.")
print(f"Took {round(end_of_cell - start_of_cell, 2)} sec.\n")

Parsed weather for Amsterdam since 2023-03-20 till 2023-04-20.
Took 0.78 sec.

Parsed weather for Athina since 2023-03-20 till 2023-04-20.
Took 0.52 sec.

Parsed weather for Berlin since 2023-03-20 till 2023-04-20.
Took 0.4 sec.

Parsed weather for Gdansk since 2023-03-20 till 2023-04-20.
Took 0.75 sec.

Parsed weather for Kraków since 2023-03-20 till 2023-04-20.
Took 0.7 sec.

Parsed weather for London since 2023-03-20 till 2023-04-20.
Took 1.06 sec.

Parsed weather for Madrid since 2023-03-20 till 2023-04-20.
Took 0.89 sec.

Parsed weather for Marseille since 2023-03-20 till 2023-04-20.
Took 1.24 sec.

Parsed weather for Milano since 2023-03-20 till 2023-04-20.
Took 0.51 sec.

Parsed weather for München since 2023-03-20 till 2023-04-20.
Took 0.46 sec.

Parsed weather for Napoli since 2023-03-20 till 2023-04-20.
Took 0.6 sec.

Parsed weather for Paris since 2023-03-20 till 2023-04-20.
Took 0.53 sec.

Parsed weather for Sevilla since 2023-03-20 till 2023-04-20.
Took 0.66 sec.

Parsed w

In [110]:
df_aq_update.date = pd.to_datetime(df_aq_update.date)
df_weather_update.date = pd.to_datetime(df_weather_update.date)

df_aq_update["unix_time"] = df_aq_update["date"].apply(convert_date_to_unix)
df_weather_update["unix_time"] = df_weather_update["date"].apply(convert_date_to_unix)

In [111]:
df_aq_update.date = df_aq_update.date.astype(str)
df_weather_update.date = df_weather_update.date.astype(str)

---

## <span style="color:#ff5f27;">⬆️ Uploading new data to the Feature Store</span>

### <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [38]:
import hopsworks


project = hopsworks.login()
fs = project.get_feature_store() 

air_quality_fg = fs.get_or_create_feature_group(
    name = 'air_quality',
    version = 1
)
weather_fg = fs.get_or_create_feature_group(
    name = 'weather',
    version = 1
)

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/14502
Connected. Call `.close()` to terminate connection gracefully.


In [29]:
air_quality_fg.insert(df_aq_update, write_options={"wait_for_job": False})

Uploading Dataframe: 0.00% |          | Rows 0/135 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/14502/jobs/named/air_quality_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f87bd562eb0>, None)

In [31]:
weather_fg.insert(df_weather_update, write_options={"wait_for_job": True})

Uploading Dataframe: 0.00% |          | Rows 0/45 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/14502/jobs/named/weather_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f87bd581c10>, None)

# TESTING

In [283]:
selected_cities_full_list = ["Albuquerque", "Varna", "Wien"]

In [284]:
dataset = df_aq_update.merge(df_weather_update, on=['city_name', 'date'])

In [285]:
dataset = dataset[dataset.city_name.isin(selected_cities_full_list)]

In [286]:
dataset = dataset.drop(columns=['unix_time_y', 'unix_time_x'])

In [287]:
# dataset.to_csv("modeling_streamlit.csv", index=False)

In [288]:
dataset = dataset.sort_values(["city_name", "date"])

In [57]:
# dict_for_streamlit = {}
# for continent in target_cities:
#         for city_name, coords in target_cities[continent].items():
#             dict_for_streamlit[city_name] = coords

In [45]:
print_fancy_header("🧬 Modeling")
HOW_MANY_DAYS_PREDICT = st.number_input(label='',
                                        min_value=3,
                                        max_value=16,
                                        step=1,
                                        value=7)
HOW_MANY_DAYS_PREDICT = int(HOW_MANY_DAYS_PREDICT)
forecast_end = today + datetime.timedelta(days=HOW_MANY_DAYS_PREDICT)

st.write(3 * "-")
print_fancy_header(f'\n🌤📆 Getting weather forecast for {str(today)}-{str(forecast_end)}...')



st.write("✅ Done!")

st.write(3 * "-")
print_fancy_header('\n🤖 Getting the model...')

saved_model_dir = download_model(
    name="air_quality_xgboost_model",
    version=1
)
regressor = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

st.write(3 * "-")
print_fancy_header('\n🧠 Predicting...')

for city_name in selected_cities_full_list:
    st.write(f"Processing {city_name}...")
    temp_date = datetime.date.today()
    for i in range(HOW_MANY_DAYS_PREDICT):
        temp_date += datetime.timedelta(days=1)
        
        df_aq_temp = pd.DataFrame(columns=dataset.columns, data=[[-1] * dataset.shape[1]])
        df_aq_temp['date'] = temp_date
        df_aq_temp['city_name'] = city_name

        df_aq_temp = pd.concat([dataset, df_aq_temp], axis=0).reset_index(drop=True)
        
        df_aq_temp['date'] = pd.to_datetime(df_aq_temp['date'])
        features.air_quality.shift_pm_2_5(df_aq_temp, days=7) # add features about 7 previous PM2.5 values

        features.air_quality.moving_average(df_aq_temp, 7)
        features.air_quality.moving_average(df_aq_temp, 14)
        features.air_quality.moving_average(df_aq_temp, 28)

        for i in [7, 14, 28]:
            for func in [features.air_quality.moving_std,
                         features.air_quality.exponential_moving_average,
                         features.air_quality.exponential_moving_std
                         ]:
                func(df_aq_temp, i)

        df_aq_temp = df_aq_temp.sort_values(by=["date", "pm2_5"]).dropna()
        df_aq_temp = df_aq_temp.reset_index(drop=True)

        features.air_quality.year(df_aq_temp)
        features.air_quality.day_of_month(df_aq_temp)
        features.air_quality.month(df_aq_temp)
        features.air_quality.day_of_week(df_aq_temp)
        features.air_quality.is_weekend(df_aq_temp)
        features.air_quality.sin_day_of_year(df_aq_temp)
        features.air_quality.cos_day_of_year(df_aq_temp)
        features.air_quality.sin_day_of_week(df_aq_temp)
        features.air_quality.cos_day_of_week(df_aq_temp)
        
        # we need only the last row (one city, one day)
        df_aq_temp = df_aq_temp[df_aq_temp['city_name'] == city_name].tail(1)

        # get weather data for this specific day
        coordinates = dict_for_streamlit[city_name]
        df_weather_temp = get_weather_data_from_open_meteo(city_name=city_name,
                                                           coordinates=coords,
                                                           start_date=str(temp_date),
                                                           end_date=str(temp_date),
                                                           forecast=True)
    
        df_aq_temp = df_aq_temp.drop(columns=df_weather_temp.columns[2:])
        X = df_aq_temp.merge(df_weather_temp, on=["city_name", "date"])
        encoded = encoder.transform(X['city_name'])

        # Convert the output to a dense array and concatenate with the original data
        X = pd.concat([X, pd.DataFrame(encoded)], axis=1)
        X = X.rename(columns={0: 'city_name_encoded'})

        feature_names = regressor.get_booster().feature_names
        X = X[feature_names]

        preds_temp = regressor.predict(X)

        df_temp = X.copy()
        df_temp['pm2_5'] = round(preds_temp[0], 1)
        df_temp['city_name'] = city_name
        df_temp['date'] = str(temp_date)
        df_temp = df_temp.drop(columns=['city_name_encoded'])

        # update dataset variable
        dataset = pd.concat([dataset, df_temp])

In [302]:
dataset = dataset_.copy()

In [311]:
max(last_dates_aq.values())

'2023-04-17'

Parsed weather for Albuquerque since 2023-04-21 till 2023-04-21.
Took 0.13 sec.

Parsed weather for Albuquerque since 2023-04-22 till 2023-04-22.
Took 0.12 sec.

Parsed weather for Albuquerque since 2023-04-23 till 2023-04-23.
Took 0.12 sec.

Parsed weather for Albuquerque since 2023-04-24 till 2023-04-24.
Took 0.12 sec.

Parsed weather for Albuquerque since 2023-04-25 till 2023-04-25.
Took 0.13 sec.

Parsed weather for Albuquerque since 2023-04-26 till 2023-04-26.
Took 0.12 sec.

Parsed weather for Albuquerque since 2023-04-27 till 2023-04-27.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-21 till 2023-04-21.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-22 till 2023-04-22.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-23 till 2023-04-23.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-24 till 2023-04-24.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-25 till 2023-04-25.
Took 0.12 sec.

Parsed weather for Varna since 2023-04-26 till 2023-04-26.

In [307]:
print(dataset[['city_name', 'date', 'pm2_5']].head(3))

       city_name        date  pm2_5
30   Albuquerque  2023-03-20   11.5
65   Albuquerque  2023-03-21    7.3
112  Albuquerque  2023-03-22    8.8


In [281]:
df_weather_forecast

Unnamed: 0,city_name,date,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant
0,Varna,2023-04-21,9.3,6.0,1.9,1.9,0.0,4.0,21.5,50.4,207


In [278]:
X = df_aq_temp.merge(df_weather_forecast, on=["city_name", "date"])

In [279]:
X

Unnamed: 0,city_name,date,pm2_5,pm_2_5_previous_1_day,pm_2_5_previous_2_day,pm_2_5_previous_3_day,pm_2_5_previous_4_day,pm_2_5_previous_5_day,pm_2_5_previous_6_day,pm_2_5_previous_7_day,...,cos_day_of_week,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant


In [263]:
get_weather_data_from_open_meteo(city_name="London",
                                   start_date="2023-05-05",
                                   end_date="2023-05-05",
                                   forecast=True)

Parsed weather for London since 2023-05-05 till 2023-05-05.
Took 0.3 sec.



Unnamed: 0,city_name,date,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant
0,London,2023-05-05,16.8,10.0,0.0,0.0,0.0,0.0,17.3,29.2,13


In [241]:
regressor = retrieved_xgboost_model

In [242]:
feature_names = regressor.get_booster().feature_names
X = X[feature_names]

preds_temp = regressor.predict(X)

In [247]:
df_temp = X.copy()
df_temp['pm2_5'] = round(preds_temp[0], 1)
df_temp['city_name'] = city_name
df_temp['date'] = str(temp_date)
df_temp = df_temp.drop(columns=['city_name_encoded'])

In [253]:
df_temp.shape

(1, 40)

In [43]:
# HOW_MANY_DAYS_PREDICT = 7

In [312]:
batch_data = pd.read_csv("debug/batch_data.csv")
df_weather_update = pd.read_csv("debug/df_weather_update.csv")
df_aq_update = pd.read_csv("debug/df_aq_update.csv")
dataset = pd.concat([batch_data, df_weather_update, df_aq_update]).reset_index(drop=True)

In [313]:
batch_data

Unnamed: 0,city_name,date,pm2_5,pm_2_5_previous_1_day,pm_2_5_previous_2_day,pm_2_5_previous_3_day,pm_2_5_previous_4_day,pm_2_5_previous_5_day,pm_2_5_previous_6_day,pm_2_5_previous_7_day,...,unix_time,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant
0,Albuquerque,2023-02-20,12.9,12.3,22.1,14.2,5.2,3.7,3.3,12.8,...,1676847600000,14.4,2.6,0.0,0.0,0.0,0.0,20.6,37.4,249
1,Albuquerque,2023-02-21,12.2,12.9,12.3,22.1,14.2,5.2,3.7,3.3,...,1676934000000,13.9,3.7,0.5,0.5,0.0,3.0,26.3,50.0,259
2,Albuquerque,2023-02-22,3.5,12.2,12.9,12.3,22.1,14.2,5.2,3.7,...,1677020400000,13.1,3.0,0.9,0.9,0.0,4.0,57.6,95.8,220
3,Albuquerque,2023-02-23,4.1,3.5,12.2,12.9,12.3,22.1,14.2,5.2,...,1677106800000,11.3,-1.6,0.0,0.0,0.0,0.0,33.1,54.4,252
4,Albuquerque,2023-02-24,9.3,4.1,3.5,12.2,12.9,12.3,22.1,14.2,...,1677193200000,12.1,-0.1,0.0,0.0,0.0,0.0,26.8,47.9,239
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1630,Wien,2023-04-10,18.0,15.0,10.0,20.0,11.0,8.0,7.0,6.0,...,1681077600000,14.1,5.0,0.0,0.0,0.0,0.0,12.4,28.4,324
1631,Wien,2023-04-11,17.0,18.0,15.0,10.0,20.0,11.0,8.0,7.0,...,1681164000000,12.4,3.6,2.0,1.9,0.0,5.0,20.9,45.7,272
1632,Wien,2023-04-12,10.9,17.0,18.0,15.0,10.0,20.0,11.0,8.0,...,1681250400000,14.5,4.7,0.2,0.0,0.0,1.0,12.8,28.8,185
1633,Wien,2023-04-13,8.2,10.9,17.0,18.0,15.0,10.0,20.0,11.0,...,1681336800000,10.0,7.0,10.9,10.9,0.0,15.0,14.7,34.9,303


In [314]:
df_weather_update

Unnamed: 0,city_name,date,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant
0,Amsterdam,2023-04-17,14.1,6.9,0.0,0.0,0.0,0.0,16.3,35.3,25
1,Amsterdam,2023-04-18,13.5,6.3,0.0,0.0,0.0,0.0,21.1,46.4,63
2,Amsterdam,2023-04-19,14.0,8.1,0.0,0.0,0.0,0.0,24.6,51.8,59
3,Amsterdam,2023-04-20,11.7,6.4,2.7,2.6,0.0,4.0,23.2,52.2,54
4,Athina,2023-04-17,20.7,12.7,0.3,0.1,0.0,2.0,11.9,55.8,173
...,...,...,...,...,...,...,...,...,...,...,...
175,Tukwila Allentown,2023-04-20,10.5,1.0,0.8,0.8,0.0,2.0,22.1,43.2,201
176,Tulalip-Totem Beach Rd,2023-04-17,11.1,2.6,2.2,3.3,0.0,3.0,26.3,50.8,167
177,Tulalip-Totem Beach Rd,2023-04-18,8.1,4.0,2.2,3.3,0.0,2.0,28.5,54.4,188
178,Tulalip-Totem Beach Rd,2023-04-19,10.4,3.7,0.0,0.0,0.0,0.0,29.2,47.2,175


In [330]:
updates = df_weather_update.merge(df_aq_update, on=['city_name', 'date'])
pd.concat([batch_data, updates]).drop(columns=['unix_time'])

Unnamed: 0,city_name,date,pm2_5,pm_2_5_previous_1_day,pm_2_5_previous_2_day,pm_2_5_previous_3_day,pm_2_5_previous_4_day,pm_2_5_previous_5_day,pm_2_5_previous_6_day,pm_2_5_previous_7_day,...,cos_day_of_week,temperature_max,temperature_min,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_max,wind_gusts_max,wind_direction_dominant
0,Albuquerque,2023-02-20,12.9,12.3,22.1,14.2,5.2,3.7,3.3,12.8,...,1.000000,14.4,2.6,0.0,0.0,0.0,0.0,20.6,37.4,249
1,Albuquerque,2023-02-21,12.2,12.9,12.3,22.1,14.2,5.2,3.7,3.3,...,0.623490,13.9,3.7,0.5,0.5,0.0,3.0,26.3,50.0,259
2,Albuquerque,2023-02-22,3.5,12.2,12.9,12.3,22.1,14.2,5.2,3.7,...,-0.222521,13.1,3.0,0.9,0.9,0.0,4.0,57.6,95.8,220
3,Albuquerque,2023-02-23,4.1,3.5,12.2,12.9,12.3,22.1,14.2,5.2,...,-0.900969,11.3,-1.6,0.0,0.0,0.0,0.0,33.1,54.4,252
4,Albuquerque,2023-02-24,9.3,4.1,3.5,12.2,12.9,12.3,22.1,14.2,...,-0.900969,12.1,-0.1,0.0,0.0,0.0,0.0,26.8,47.9,239
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,Tampa,2023-04-17,8.4,6.1,7.2,8.0,6.4,6.7,8.0,6.4,...,1.000000,27.3,20.2,11.2,16.8,0.0,3.0,20.2,26.6,345
146,Tampa,2023-04-17,9.0,8.4,6.1,7.2,8.0,6.4,6.7,8.0,...,1.000000,27.3,20.2,11.2,16.8,0.0,3.0,20.2,26.6,345
147,Tampa,2023-04-18,4.5,9.0,8.4,6.1,7.2,8.0,6.4,6.7,...,0.623490,28.9,11.8,0.0,0.0,0.0,0.0,15.4,34.2,18
148,Tampa,2023-04-19,9.3,4.5,9.0,8.4,6.1,7.2,8.0,6.4,...,-0.222521,30.2,14.7,0.0,0.0,0.0,0.0,17.7,23.8,69


In [None]:
get_weather_data_from_open_meteo(city_name="Berlin",
                                               start_date=,
                                               end_date=str(today),
                                               forecast=True)