In [228]:
import datetime
import numpy as np
import pandas as pd
import pickle
import os
import duckdb

from dotenv import load_dotenv
from datetime import timedelta

from holidays_feature.holidays_feature import add_ukrainian_holidays
from holidays_feature.holidays_feature import add_russian_holidays

load_dotenv("../.path_env")

%load_ext sql
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [229]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [230]:
REPORTS_DATA_FILE = "./results/tfidf.csv"

OUTPUT_FOLDER = "results"
ISW_OUTPUT_DATA_FILE = "all_isw.csv"
WEATHER_EVENTS_OUTPUT_DATA_FILE = "all_hourly_weather_events.csv"

MODEL_FOLDER = "model"

tfidf_transformer_model = "tfidf_transformer"
count_vectorizer_model = "count_vectorizer"

tfidf_transformer_version = "v1"
count_vectorizer_version = "v1"

In [231]:
def isNaN(num):
    return num != num

## reading data

In [232]:
df_isw = pd.read_csv(f"{REPORTS_DATA_FILE}", sep=",")

In [233]:
df_isw.head(5)

Unnamed: 0,Name,Date,Keywords
0,assessment-2023-03-01,2023-03-01,"{'vučić': 0.24297900380426773, 'serbia': 0.213..."
1,assessment-2023-03-02,2023-03-02,"{'bakhmut': 0.1628544821285347, 'bryansk': 0.1..."
2,assessment-2023-03-03,2023-03-03,"{'election': 0.21089748677899847, 'bakhmut': 0..."
3,assessment-2023-03-04,2023-03-04,"{'kartapalov': 0.17017734958322397, 'bakhmut':..."
4,assessment-2023-03-05,2023-03-05,"{'bakhmut': 0.34564558640106186, 'offensive': ..."


## preparing ISW reports

## reading models

In [234]:
# #load the content
# tfidf = pickle.load(open(f"{MODEL_FOLDER}/{tfidf_transformer_model}_{tfidf_transformer_version}.pkl", "rb"))
# cv = pickle.load(open(f"{MODEL_FOLDER}/{count_vectorizer_model}_{count_vectorizer_version}.pkl", "rb"))

In [235]:
# df_isw['keywords'] = df_isw['text_preprocessed_lemm'].apply(lambda x: tf_idf.conver_doc_to_vector(x,cv,tfidf))

In [236]:
# df_isw.head(5)

In [237]:
df_isw["date_datetime"] = pd.to_datetime(df_isw["Date"])

In [238]:
df_isw["date_tomorrow_datetime"] = df_isw["date_datetime"].apply(
    lambda x: x + datetime.timedelta(days=1)
)
df_isw["event_time"] = np.nan

In [239]:
df_isw = df_isw.rename(columns={"date_datetime": "report_date"})
df_isw.to_csv(f"{OUTPUT_FOLDER}/{ISW_OUTPUT_DATA_FILE}", sep=";", index=False)

In [240]:
# Add holidays data to df_isw
add_ukrainian_holidays(df_isw, day_datetime_column='report_date', column_name='ukrainian_holiday')
add_russian_holidays(df_isw, day_datetime_column='report_date', column_name='russian_holiday')
# df_isw.loc[df_isw['ukrainian_holiday'] == 1]

In [241]:
# df_isw.where(df_isw['ukrainian_holiday'] == 1)
# df_isw.sample(5)
# df_isw.loc[df_isw['ukrainian_holiday'] == 0].shape
# df_isw.shape
# df_isw.loc[df_isw['russian_holiday'] == 1]
# df_isw.loc[df_isw['ukrainian_holiday'] == 1]

## prepare events data

In [242]:
EVENTS_DATA_FILE = "../" + os.getenv("ALARMS_DATA_FILE")

In [243]:
df_events = pd.read_csv(f"{EVENTS_DATA_FILE}", sep=";")

In [244]:
df_events_v2 = df_events.drop(["id", "region_id"], axis=1)
df_events_v2["event_time"] = np.nan

In [245]:
df_events_v2.head(5)
df_events_v2.shape

(19933, 8)

In [246]:
# df_events_v2["start_time"] = df_events_v2.apply(lambda x: x["start"] if not isNaN(x["start"]) else x["event_time"] , axis=1)
# df_events_v2["end_time"] = df_events_v2.apply(lambda x: x["end"] if not isNaN(x["end"]) else x["event_time"], axis=1)

In [247]:
df_events_v2["start_time"] = pd.to_datetime(df_events_v2["start"])
df_events_v2["end_time"] = pd.to_datetime(df_events_v2["end"])
df_events_v2["event_time"] = pd.to_datetime(df_events_v2["event_time"])

In [248]:
df_events_v2["start_hour"] = df_events_v2["start_time"].dt.floor("H")
df_events_v2["end_hour"] = df_events_v2["end_time"].dt.ceil("H")
df_events_v2["event_hour"] = df_events_v2["event_time"].dt.round("H")

In [249]:
df_events_v2["start_hour"] = df_events_v2.apply(
    lambda x: x["start_hour"] if not isNaN(x["start_hour"]) else x["event_hour"], axis=1
)
df_events_v2["end_hour"] = df_events_v2.apply(
    lambda x: x["end_hour"] if not isNaN(x["end_hour"]) else x["event_hour"], axis=1
)

In [250]:
df_events_v2["day_date"] = df_events_v2["start_time"].dt.date

df_events_v2.head(10)

df_events_v2["start_hour_datetimeEpoch"] = df_events_v2["start_hour"].apply(
    lambda x: int(x.timestamp()) if not isNaN(x) else None
)
df_events_v2["end_hour_datetimeEpoch"] = df_events_v2["end_hour"].apply(
    lambda x: int(x.timestamp()) if not isNaN(x) else None
)

# df_events_v2.head(10)

In [251]:
# df_events_v2[~(df_events_v2["type"]=="alarm")].shape

In [252]:
# df_events_v2[~(df_events_v2["type"]=="alarm")].head(5)

In [253]:
# df_events_v2[df_events_v2["type"]=="alarm"].shape

In [254]:
df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,NaT,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,NaT,2022-02-26,1645862400,1645873200
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,NaT,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,NaT,2022-02-26,1645869600,1645876800
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,NaT,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,NaT,2022-02-26,1645880400,1645887600
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,NaT,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645887600,1645894800
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645891200,1645894800
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,NaT,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,NaT,2022-02-26,1645891200,1645898400


## prepare weather

In [255]:
WEATHER_DATA_FILE = "../" + os.getenv("WEATHER_DATA_FILE")

In [256]:
df_weather = pd.read_csv(f"{WEATHER_DATA_FILE}")
df_weather["day_datetime"] = pd.to_datetime(df_weather["day_datetime"])

In [257]:
df_weather.shape

(182712, 67)

In [258]:
df_weather.head(15)

Unnamed: 0,city_latitude,city_longitude,city_resolvedAddress,city_address,city_timezone,city_tzoffset,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_feelslikemax,day_feelslikemin,day_feelslike,day_dew,day_humidity,day_precip,day_precipprob,day_precipcover,day_snow,day_snowdepth,day_windgust,day_windspeed,day_winddir,day_pressure,day_cloudcover,day_visibility,day_solarradiation,day_solarenergy,day_uvindex,day_severerisk,day_sunrise,day_sunriseEpoch,day_sunset,day_sunsetEpoch,day_moonphase,day_conditions,day_description,day_icon,day_source,day_preciptype,day_stations,hour_datetime,hour_datetimeEpoch,hour_temp,hour_feelslike,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,hour_icon,hour_source,hour_stations
0,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,00:00:00,1645653600,2.4,-1.6,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,snow,obs,remote
1,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,01:00:00,1645657200,2.4,-1.5,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,fog,obs,remote
2,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,02:00:00,1645660800,2.9,-0.8,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,cloudy,obs,33177099999
3,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,03:00:00,1645664400,2.3,-1.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,fog,obs,remote
4,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,04:00:00,1645668000,1.9,-1.8,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,cloudy,obs,remote
5,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,05:00:00,1645671600,1.9,-1.3,91.66,0.6,0.0,0.0,0.0,0.1,,23.4,10.8,296.0,1022.5,10.0,100.0,0.0,,0.0,10.0,Overcast,cloudy,obs,33177099999;33301099999
6,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,06:00:00,1645675200,2.0,-1.1,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],20.9,10.8,300.0,1021.0,10.0,100.0,0.0,,0.0,10.0,Overcast,cloudy,obs,UKLR;33301099999
7,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,07:00:00,1645678800,2.0,-1.1,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],19.1,10.8,300.0,1022.0,10.0,100.0,0.0,,0.0,10.0,Overcast,cloudy,obs,UKLR;33301099999
8,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,08:00:00,1645682400,1.8,-0.3,91.32,0.6,0.118,100.0,0.0,0.1,['snow'],16.9,7.2,303.0,1024.2,4.4,100.0,,,,10.0,"Snow, Overcast",rain,obs,33177099999;UKLR;33301099999
9,50.7469,25.3263,"Луцьк, Луцький район, Україна","Lutsk,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,0.7,2.6,4.0,-3.1,-0.2,0.0,83.7,0.118,100.0,4.17,0.1,0.1,32.4,15.5,252.7,1022.3,72.3,12.2,36.9,2.8,1.0,10.0,07:13:36,1645679616,17:51:06,1645717866,0.77,"Snow, Partially cloudy",Partly cloudy throughout the day with morning ...,snow,obs,snow,33177099999;UKLR;remote;33301099999,09:00:00,1645686000,2.0,-1.1,93.09,1.0,0.0,0.0,0.0,0.1,,15.5,10.8,300.0,1024.0,2.0,100.0,15.0,0.1,0.0,10.0,Overcast,cloudy,obs,UKLR;33301099999


In [259]:
# len(clmns)

In [260]:
# exclude
weather_exclude = [
    "day_feelslikemax",
    "day_feelslikemin",
    "day_sunriseEpoch",
    "day_sunsetEpoch",
    "day_description",
    "city_latitude",
    "city_longitude",
    "city_address",
    "city_timezone",
    "city_tzoffset",
    "day_feelslike",
    "day_precipprob",
    "day_snow",
    "day_snowdepth",
    "day_windgust",
    "day_windspeed",
    "day_winddir",
    "day_pressure",
    "day_cloudcover",
    "day_visibility",
    "day_severerisk",
    "day_conditions",
    "day_icon",
    "day_source",
    "day_preciptype",
    "day_stations",
    "hour_icon",
    "hour_source",
    "hour_stations",
    "hour_feelslike",
]

In [261]:
# new_list = [x for x in clmns if (x not in weather_exclude)]
# new_list

In [262]:
df_weather_v2 = df_weather.drop(weather_exclude, axis=1)

In [263]:
df_weather_v2["city"] = df_weather_v2["city_resolvedAddress"].apply(
    lambda x: x.split(",")[0]
)
df_weather_v2["city"] = df_weather_v2["city"].replace(
    "Хмельницька область", "Хмельницький"
)

In [264]:
df_weather_v2.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк


In [265]:
df_weather_v2.shape

(182712, 38)

## merging data

In [266]:
REGIONS_DATA_FOLDER = "../external_data/additions"
REGIONS_DATA_FILE = "regions.csv"
df_regions = pd.read_csv(f"{REGIONS_DATA_FOLDER}/{REGIONS_DATA_FILE}")

In [267]:
df_regions.head(5)

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5


In [268]:
df_weather_reg = pd.merge(
    df_weather_v2, df_regions, left_on="city", right_on="center_city_ua"
)

In [269]:
df_weather_reg.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
5,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,05:00:00,1645671600,1.9,91.66,0.6,0.0,0.0,0.0,0.1,,23.4,10.8,296.0,1022.5,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
6,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,06:00:00,1645675200,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],20.9,10.8,300.0,1021.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
7,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,07:00:00,1645678800,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],19.1,10.8,300.0,1022.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
8,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,08:00:00,1645682400,1.8,91.32,0.6,0.118,100.0,0.0,0.1,['snow'],16.9,7.2,303.0,1024.2,4.4,100.0,,,,10.0,"Snow, Overcast",Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
9,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,09:00:00,1645686000,2.0,93.09,1.0,0.0,0.0,0.0,0.1,,15.5,10.8,300.0,1024.0,2.0,100.0,15.0,0.1,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3


In [270]:
df_weather_reg.shape

(182712, 43)

In [271]:
df_weather_v2.shape

(182712, 38)

### Merging weather and events

In [272]:
# df_events_v2["start_hour_datetimeEpoch"] = df_events_v2['start_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)
# df_events_v2["end_hour_datetimeEpoch"] = df_events_v2['end_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)

In [273]:
df_events_v2.dtypes

region_title                        object
region_city                         object
all_region                           int64
start                               object
end                                 object
clean_end                           object
intersection_alarm_id              float64
event_time                  datetime64[ns]
start_time                  datetime64[ns]
end_time                    datetime64[ns]
start_hour                  datetime64[ns]
end_hour                    datetime64[ns]
event_hour                  datetime64[ns]
day_date                            object
start_hour_datetimeEpoch             int64
end_hour_datetimeEpoch               int64
dtype: object

In [274]:
df_events_v2.shape

(19933, 16)

In [275]:
df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,NaT,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,NaT,2022-02-26,1645862400,1645873200
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,NaT,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,NaT,2022-02-26,1645869600,1645876800
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,NaT,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,NaT,2022-02-26,1645880400,1645887600
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,NaT,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645887600,1645894800
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645891200,1645894800
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,NaT,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,NaT,2022-02-26,1645891200,1645898400


In [276]:
# df_events_v2_sample = df_events_v2.sample(10)
# df_events_v2_sample.shape

events_dict = df_events_v2.to_dict("records")
events_by_hour = []

In [277]:
events_dict[0]

{'region_title': 'Вінниччина',
 'region_city': 'Вінниця',
 'all_region': 0,
 'start': '2022-02-25 22:55:42',
 'end': '2022-02-25 23:41:53',
 'clean_end': '2022-02-25 23:41:53',
 'intersection_alarm_id': nan,
 'event_time': NaT,
 'start_time': Timestamp('2022-02-25 22:55:42'),
 'end_time': Timestamp('2022-02-25 23:41:53'),
 'start_hour': Timestamp('2022-02-25 22:00:00'),
 'end_hour': Timestamp('2022-02-26 00:00:00'),
 'event_hour': NaT,
 'day_date': datetime.date(2022, 2, 25),
 'start_hour_datetimeEpoch': 1645826400,
 'end_hour_datetimeEpoch': 1645833600}

In [278]:
for event in events_dict:
    for d in pd.date_range(start=event["start_hour"], end=event["end_hour"], freq="1H"):
        et = event.copy()
        et["hour_level_event_time"] = d
        events_by_hour.append(et)

In [279]:
df_events_v3 = pd.DataFrame.from_dict(events_by_hour)
df_events_v3["hour_level_event_datetimeEpoch"] = df_events_v3[
    "hour_level_event_time"
].apply(lambda x: int(x.timestamp()) if not isNaN(x) else None)

In [280]:
df_events_v3.shape

(58860, 18)

In [281]:
df_events_v3.head(15)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,event_time,start_time,end_time,start_hour,end_hour,event_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-25 22:00:00,1645826400
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-25 23:00:00,1645830000
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-26 00:00:00,1645833600
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 06:00:00,1645855200
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 07:00:00,1645858800
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645862400
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400,2022-02-26 07:00:00,1645858800
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400,2022-02-26 08:00:00,1645862400
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600,2022-02-26 08:00:00,1645862400
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600,2022-02-26 09:00:00,1645866000


In [282]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3


In [283]:
df_weather_reg.shape

(182712, 43)

In [284]:
df_events_v3.head(10)
df_weather_reg.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,00:00:00,1645653600,2.4,89.18,0.8,0.0,0.0,0.1,0.2,['snow'],31.3,15.5,275.6,1020.0,0.0,91.5,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
1,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,01:00:00,1645657200,2.4,87.9,0.6,0.0,0.0,0.0,0.2,['snow'],27.7,14.8,280.3,1021.0,0.2,88.2,0.0,,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
2,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,02:00:00,1645660800,2.9,88.58,1.2,0.0,0.0,0.0,0.1,['snow'],29.2,14.4,310.0,1022.0,10.0,100.0,,,,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
3,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,03:00:00,1645664400,2.3,86.63,0.3,0.0,0.0,0.0,0.1,['snow'],23.8,13.3,295.1,1021.0,0.1,92.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
4,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,04:00:00,1645668000,1.9,87.85,0.1,0.0,0.0,0.0,0.1,['snow'],24.5,13.3,305.8,1021.0,0.0,93.8,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
5,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,05:00:00,1645671600,1.9,91.66,0.6,0.0,0.0,0.0,0.1,,23.4,10.8,296.0,1022.5,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
6,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,06:00:00,1645675200,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],20.9,10.8,300.0,1021.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
7,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,07:00:00,1645678800,2.0,93.09,1.0,0.0,0.0,0.0,0.1,['snow'],19.1,10.8,300.0,1022.0,10.0,100.0,0.0,,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
8,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,08:00:00,1645682400,1.8,91.32,0.6,0.118,100.0,0.0,0.1,['snow'],16.9,7.2,303.0,1024.2,4.4,100.0,,,,10.0,"Snow, Overcast",Луцьк,Волинська,Луцьк,Lutsk,Волинь,3
9,"Луцьк, Луцький район, Україна",2022-02-24,1645653600,4.9,0.7,2.6,0.0,83.7,0.118,4.17,36.9,2.8,1.0,07:13:36,17:51:06,0.77,09:00:00,1645686000,2.0,93.09,1.0,0.0,0.0,0.0,0.1,,15.5,10.8,300.0,1024.0,2.0,100.0,15.0,0.1,0.0,10.0,Overcast,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3


In [285]:
df_events_v4 = df_events_v3.copy().add_prefix("event_")
df_events_v4.head(10)

Unnamed: 0,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_event_time,event_start_time,event_end_time,event_start_hour,event_end_hour,event_event_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-25 22:00:00,1645826400
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-25 23:00:00,1645830000
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,NaT,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,NaT,2022-02-25,1645826400,1645833600,2022-02-26 00:00:00,1645833600
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 06:00:00,1645855200
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 07:00:00,1645858800
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,NaT,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645862400
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400,2022-02-26 07:00:00,1645858800
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645858800,1645862400,2022-02-26 08:00:00,1645862400
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600,2022-02-26 08:00:00,1645862400
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,NaT,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,NaT,2022-02-26,1645862400,1645869600,2022-02-26 09:00:00,1645866000


In [286]:
df_weather_v4 = df_weather_reg.merge(
    df_events_v4,
    how="left",
    left_on=["region_alt", "hour_datetimeEpoch"],
    right_on=["event_region_title", "event_hour_level_event_datetimeEpoch"],
)

In [287]:
# Alarm data
print(df_weather_v4.loc[~ isNaN(df_weather_v4['event_start'])].shape)
print(df_weather_v4.loc[isNaN(df_weather_v4['event_start'])].shape)
df_weather_v4['is_alarm'] = df_weather_v4.apply(lambda x: 0 if isNaN(x['event_start']) else 1, axis=1)
no_alarms = df_weather_v4.loc[df_weather_v4['is_alarm'] == 0].size
alarms = df_weather_v4.loc[df_weather_v4['is_alarm'] == 1].size
print(f"Alarm chane: {alarms / df_weather_v4.size}")
print(f"No alarm: {no_alarms / df_weather_v4.size}")
# df_weather_v4.sample(5)

(49249, 61)
(145835, 61)
Alarm chane: 0.2524502265690677
No alarm: 0.7475497734309323


In [288]:
# Merge isw data to df_weather_v4
df_weather_v5 = pd.merge(
    df_weather_v4,
    df_isw[['Keywords', 'report_date', 'date_tomorrow_datetime', 'ukrainian_holiday', 'russian_holiday']],
    left_on="day_datetime", right_on="report_date"
)
df_weather_v6 = None
df_weather_v5[df_weather_v5['event_start_time'] > pd.to_datetime(0)].head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_event_time,event_start_time,event_end_time,event_start_hour,event_end_hour,event_event_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch,is_alarm,Keywords,report_date,date_tomorrow_datetime,ukrainian_holiday,russian_holiday
1122,"Луцьк, Луцький район, Україна",2022-02-26,1645826400,6.3,-1.5,1.9,-2.7,73.4,0.0,0.0,116.2,8.4,4.0,07:09:30,17:54:34,0.84,18:00:00,1645891200,3.9,58.94,-3.4,0.0,0.0,0.0,0.0,,12.2,4.3,325.5,1030.0,24.1,69.5,164.0,0.6,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3,Волинь,Волинська обл.,1.0,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645891000.0,1645895000.0,2022-02-26 16:00:00,1645891000.0,1,"{'kyiv': 0.44723465586511074, 'zaprozhia': 0.1...",2022-02-26,2022-02-27,0,0
1123,"Луцьк, Луцький район, Україна",2022-02-26,1645826400,6.3,-1.5,1.9,-2.7,73.4,0.0,0.0,116.2,8.4,4.0,07:09:30,17:54:34,0.84,19:00:00,1645894800,2.6,64.61,-3.4,0.0,0.0,0.0,0.0,,7.2,3.2,327.1,1030.0,24.1,75.3,131.0,0.5,0.0,10.0,Partially cloudy,Луцьк,Волинська,Луцьк,Lutsk,Волинь,3,Волинь,Волинська обл.,1.0,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,NaT,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,NaT,2022-02-26,1645891000.0,1645895000.0,2022-02-26 17:00:00,1645895000.0,1,"{'kyiv': 0.44723465586511074, 'zaprozhia': 0.1...",2022-02-26,2022-02-27,0,0
1281,"Одеса, Україна",2022-02-26,1645826400,10.4,1.6,5.3,-0.1,68.6,0.0,0.0,142.8,12.9,4.0,06:42:15,17:38:26,0.84,09:00:00,1645858800,3.7,74.03,-0.5,0.0,0.0,0.0,0.0,,25.9,13.7,344.5,1021.0,24.1,88.0,249.0,0.9,2.0,10.0,Partially cloudy,Одеса,Одеська,Одеса,Odesa,Одещина,15,Одещина,Одеса,0.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645859000.0,1645862000.0,2022-02-26 07:00:00,1645859000.0,1,"{'kyiv': 0.44723465586511074, 'zaprozhia': 0.1...",2022-02-26,2022-02-27,0,0
1282,"Одеса, Україна",2022-02-26,1645826400,10.4,1.6,5.3,-0.1,68.6,0.0,0.0,142.8,12.9,4.0,06:42:15,17:38:26,0.84,10:00:00,1645862400,4.8,69.04,-0.4,0.0,0.0,0.0,0.0,,27.4,14.8,344.7,1022.0,24.1,95.2,305.0,1.1,3.0,10.0,Overcast,Одеса,Одеська,Одеса,Odesa,Одещина,15,Одещина,Одеса,0.0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,NaT,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,NaT,2022-02-26,1645859000.0,1645862000.0,2022-02-26 08:00:00,1645862000.0,1,"{'kyiv': 0.44723465586511074, 'zaprozhia': 0.1...",2022-02-26,2022-02-27,0,0
1362,"Хмельницька область, Україна",2022-02-26,1645826400,5.2,-1.3,2.3,-3.1,69.6,0.0,0.0,120.4,10.7,4.0,07:01:04,17:49:47,0.84,18:00:00,1645891200,3.6,52.98,-5.1,0.0,0.0,0.0,0.0,,21.6,5.4,11.2,1029.0,24.1,95.1,218.0,0.8,0.0,10.0,Overcast,Хмельницький,Хмельницька,Хмельницький,Khmelnytskyi,Хмельниччина,22,Хмельниччина,Деражня,0.0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,NaT,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,NaT,2022-02-26,1645891000.0,1645898000.0,2022-02-26 16:00:00,1645891000.0,1,"{'kyiv': 0.44723465586511074, 'zaprozhia': 0.1...",2022-02-26,2022-02-27,0,0


## Feature engineering

### Number of alarms for this region during the last 24 hours
Use DuckDB for analytics as running with Pandas and Python taking too long.

In [289]:
%%sql
df_weather_v6 << select df.*, alarm_count.events_last_24_hrs
from df_weather_v5 df
         inner join (select out.region_id,
                            out.hour_datetimeEpoch,
                            count(*) as events_last_24_hrs
                     from df_weather_v5 out
                              inner join df_weather_v5 inn
                                         on out.region_id = inn.region_id
                     where inn.is_alarm = 1
                       and inn.event_start_time::timestamp
                        between
                        (epoch_ms(out.hour_datetimeEpoch * 1000) - '24 HOURS'::interval)
                            and epoch_ms(out.hour_datetimeEpoch * 1000)
                     group by out.region_id, out.hour_datetimeEpoch) as alarm_count
        on df.region_id = alarm_count.region_id and df.hour_datetimeEpoch = alarm_count.hour_datetimeEpoch;


FloatProgress(value=0.0, layout=Layout(width='100%'), style=ProgressStyle(bar_color='black'))

In [290]:
df_weather_v6.tail(100)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_event_time,event_start_time,event_end_time,event_start_hour,event_end_hour,event_event_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch,is_alarm,Keywords,report_date,date_tomorrow_datetime,ukrainian_holiday,russian_holiday,events_last_24_hrs
133231,"Черкаси, Україна",2023-01-20,1674165600,11.1,2.5,7.4,3.8,78.9,0.0,0.0,46.8,4.0,2.0,07:38:23,16:27:29,0.93,23:00:00,1674248400,2.5,86.65,0.5,0.0,0.0,0.0,0.0,,26.6,15.8,97.8,1017.0,24.1,100.0,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,4
133232,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,00:00:00,1674165600,5.1,78.11,1.6,0.0,0.0,0.0,0.3,,38.9,19.8,158.4,1017.0,24.1,100.0,0.0,,0.0,10.0,Overcast,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,3
133233,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,01:00:00,1674169200,4.6,76.91,0.9,0.0,0.0,0.0,0.3,,38.2,19.1,158.4,1016.0,24.1,93.3,0.0,,0.0,10.0,Overcast,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,3
133234,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,02:00:00,1674172800,3.8,73.98,-0.4,0.0,0.0,0.0,0.0,,37.4,24.9,131.0,1016.8,10.0,60.0,0.0,,0.0,10.0,Partially cloudy,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,3
133235,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,04:00:00,1674180000,3.8,79.04,0.5,0.0,0.0,0.0,0.0,,35.6,17.3,152.7,1017.0,24.1,99.4,0.0,,0.0,10.0,Overcast,Суми,Сумська,Суми,Sumy,Сумщина,18,Сумщина,Сумська обл.,1.0,2023-01-20 01:02:42,2023-01-20 01:28:50,2023-01-20 01:28:50,,,2023-01-20 01:02:42,2023-01-20 01:28:50,2023-01-20 01:00:00,2023-01-20 02:00:00,,2023-01-20,1674176000.0,1674180000.0,2023-01-20 02:00:00,1674180000.0,1,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5
133236,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,05:00:00,1674183600,3.1,74.99,-0.9,0.0,0.0,0.0,0.0,,36.0,28.5,141.0,1017.4,10.0,78.0,0.0,,0.0,10.0,Partially cloudy,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5
133237,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,06:00:00,1674187200,3.1,81.85,0.3,0.0,0.0,0.0,0.0,,32.0,16.2,153.9,1017.0,24.1,58.1,0.0,,0.0,10.0,Partially cloudy,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5
133238,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,07:00:00,1674190800,2.7,82.99,0.1,0.0,0.0,0.0,0.0,,31.3,16.2,154.1,1018.0,24.1,49.2,0.0,,0.0,10.0,Partially cloudy,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5
133239,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,08:00:00,1674194400,2.8,73.88,-1.4,0.0,0.0,0.0,0.0,,39.6,24.9,141.0,1018.9,10.3,41.0,3.0,0.0,0.0,10.0,Partially cloudy,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5
133240,"Суми, Україна",2023-01-20,1674165600,7.1,-0.6,3.6,-0.5,74.8,0.0,0.0,39.9,3.5,2.0,07:33:03,16:10:58,0.93,10:00:00,1674201600,4.0,73.01,-0.4,0.0,0.0,0.0,0.0,,34.2,17.3,152.4,1021.0,24.1,100.0,106.0,0.4,1.0,10.0,Overcast,Суми,Сумська,Суми,Sumy,Сумщина,18,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,0,"{'wagner': 0.24394059186846612, 'pavlov': 0.18...",2023-01-20,2023-01-21,0,0,5


In [291]:
# Add day of week name
df_weather_v6["day_of_week"] = df_weather_v6["day_datetime"].apply(lambda date: pd.to_datetime(date).day_name())

df_weather_v6[["day_datetime", "day_of_week"]].head(10)

Unnamed: 0,day_datetime,day_of_week
0,2022-02-26,Saturday
1,2022-02-26,Saturday
2,2022-02-26,Saturday
3,2022-02-26,Saturday
4,2022-02-26,Saturday
5,2022-02-26,Saturday
6,2022-02-26,Saturday
7,2022-02-26,Saturday
8,2022-02-26,Saturday
9,2022-02-26,Saturday


In [292]:
# Encode days of week into one hot encoding for linear regression
df_weather_v6 = pd.get_dummies(df_weather_v6, columns=["day_of_week"], prefix=["day_of_week"])

df_weather_v6[["day_datetime", "day_of_week_Monday", "day_of_week_Tuesday", "day_of_week_Wednesday",
               "day_of_week_Thursday", "day_of_week_Friday", "day_of_week_Saturday", "day_of_week_Sunday"]].head(10)

Unnamed: 0,day_datetime,day_of_week_Monday,day_of_week_Tuesday,day_of_week_Wednesday,day_of_week_Thursday,day_of_week_Friday,day_of_week_Saturday,day_of_week_Sunday
0,2022-02-26,False,False,False,False,False,True,False
1,2022-02-26,False,False,False,False,False,True,False
2,2022-02-26,False,False,False,False,False,True,False
3,2022-02-26,False,False,False,False,False,True,False
4,2022-02-26,False,False,False,False,False,True,False
5,2022-02-26,False,False,False,False,False,True,False
6,2022-02-26,False,False,False,False,False,True,False
7,2022-02-26,False,False,False,False,False,True,False
8,2022-02-26,False,False,False,False,False,True,False
9,2022-02-26,False,False,False,False,False,True,False


In [293]:
# Add holidays to df_weather_v6
add_ukrainian_holidays(df_weather_v6, day_datetime_column='day_datetime', column_name='ukrainian_holiday')
add_russian_holidays(df_weather_v6, day_datetime_column='day_datetime', column_name='russian_holiday')
#
print(df_weather_v6.shape)
print(df_weather_v6.loc[df_weather_v6['ukrainian_holiday'] == 1].shape)
print(df_weather_v6.loc[df_weather_v6['russian_holiday'] == 1].shape)

(133331, 75)
(5938, 75)
(4668, 75)


### Handle categorical data

In [294]:

df_weather_v6["hour_conditions"] = pd.Categorical(df_weather_v6['hour_conditions'])
# df_weather_v5["hour_conditions"].astype('category')
# weather_condition_categories = df_weather_v5["hour_conditions"].cat.categories.values
# weather_condition_categories
df_weather_v6["hour_conditions_code"] = df_weather_v6["hour_conditions"].cat.codes
df_weather_v6[["hour_conditions", "hour_conditions_code"]].head(5)

Unnamed: 0,hour_conditions,hour_conditions_code
0,Partially cloudy,3
1,Partially cloudy,3
2,Clear,0
3,Partially cloudy,3
4,Overcast,2


### Save final merged dataframe

In [295]:
df_weather_v6.to_csv(
    f"{OUTPUT_FOLDER}/{WEATHER_EVENTS_OUTPUT_DATA_FILE}", sep=";", index=False
)

In [296]:
# Uncomment to save final merged dataset ~4G
# df_weather_v5.to_csv(
#     f"{OUTPUT_FOLDER}/all_hourly_weather_events_final.csv", sep=";", index=False
# )

### Test LinearRegression

In [297]:
import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

df_final = df_weather_v6

# Fill NaN values
df_final[['event_all_region']] = df_final[['event_all_region']].fillna(value=0)

# Separating the data into independent and dependent variables
# Converting each dataframe into a numpy array
X = np.array(df_final[['region_id', 'event_all_region', 'day_datetimeEpoch', 'hour_datetimeEpoch', 'ukrainian_holiday',
                       'russian_holiday', 'hour_temp', 'hour_snow', 'hour_visibility', 'hour_conditions_code',
                       "day_of_week_Monday", "day_of_week_Tuesday", "day_of_week_Wednesday",
                       "day_of_week_Thursday", "day_of_week_Friday", "day_of_week_Saturday", "day_of_week_Sunday"]])
y = np.array(df_final['is_alarm'])

# Dropping any rows with Nan values
# df_final.dropna(inplace = True)

# Splitting the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

# LinerRegression
regr = LinearRegression()
#
regr.fit(X_train, y_train)
#
print(regr.score(X_test, y_test))
# df_weather_v4 -      0.7357812097367479
# df_weather_v5 -      0.728836911878402
# hours with no alarm - 0.7475497734309323

0.6961430310615528
