In [1]:
import datetime
import calendar
import numpy as np
import pandas as pd

import pickle

from sklearn.feature_extraction.text import TfidfTransformer

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
INPUT_DATA_FOLDER = "data/2_isw_preprocessed"
REPORTS_DATA_FILE = "all_days.csv"

OUTPUT_FOLDER = "data/4_all_data_preprocessed"
ISW_OUTPUT_DATA_FILE = "all_isw.csv"
WEATHER_EVENTS_OUTPUT_DATA_FILE = "all_hourly_weather_events.csv"

MODEL_FOLDER = "model"

tfidf_transformer_model = "tfidf_transformer"
count_vectorizer_model = "count_vectorizer"

tfidf_transformer_version = "v2"
count_vectorizer_version = "v2"

In [4]:
def isNaN(num):
    return num != num

## reading data

In [5]:
df_isw = pd.read_csv(f"{INPUT_DATA_FOLDER}/{REPORTS_DATA_FILE}", sep=";")

In [6]:
df_isw.head(5)

Unnamed: 0,date,text,lemm,stemm
0,2022-03-01,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march ukrai...,snaz unit throughout kherson citi march ukrai...
1,2022-03-02,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march inclu...,snaz unit throughout kherson citi march inclu...
2,2022-03-03,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...
3,2022-03-04,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...
4,2022-03-05,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...


## preparing ISW reports

## reading models

In [7]:
#load the content
tfidf = pickle.load(open(f"{MODEL_FOLDER}/{tfidf_transformer_model}_{tfidf_transformer_version}.pkl", "rb"))
cv = pickle.load(open(f"{MODEL_FOLDER}/{count_vectorizer_model}_{count_vectorizer_version}.pkl", "rb"))

In [8]:
def sort_coo(matrix):
    tuples = zip(matrix.col, matrix.data)
    return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True)

def extract_top_n_from_vector(feature_names, sorted_items, n):
    sorted_items = sorted_items[:n]

    score_vals = []
    feature_vals = []

    #word index
    for idx, score in sorted_items:
        score_vals.append(round(score, 3))
        feature_vals.append(feature_names[idx])

    results = {}
    for idx in range(len(feature_vals)):
        results[feature_vals[idx]] = score_vals[idx]

    return results

def convert_to_vector(doc, tfidf_transformer, count_vectorized):
    feature_names = count_vectorized.get_feature_names_out()
    n = 100
    tfidf_vector = tfidf_transformer.transform(count_vectorized.transform([doc]))

    sorted_items = sort_coo(tfidf_vector.tocoo())

    keywords = extract_top_n_from_vector(feature_names, sorted_items, n)

    return keywords

In [9]:
df_isw['keywords'] = df_isw['lemm'].apply(lambda x: convert_to_vector(x, tfidf, cv))

In [10]:
df_isw.head(5)


Unnamed: 0,date,text,lemm,stemm,keywords
0,2022-03-01,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march ukrai...,snaz unit throughout kherson citi march ukrai...,"{'snaz': 0.345, 'commenc': 0.295, 'desna': 0.2..."
1,2022-03-02,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march inclu...,snaz unit throughout kherson citi march inclu...,"{'mykolayiv': 0.494, 'toward': 0.264, 'march':..."
2,2022-03-03,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'march': 0.501, 'kyiv': 0.361, 'chernihiv': 0..."
3,2022-03-04,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'kyiv': 0.352, 'mykolayiv': 0.205, 'march': 0..."
4,2022-03-05,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'march': 0.335, 'kyiv': 0.317, 'hour': 0.211,..."


In [11]:
df_isw["date_datetime"] = pd.to_datetime(df_isw["date"])

In [12]:
df_isw['date_tomorrow_datetime'] = df_isw['date_datetime'].apply(lambda x: x+datetime.timedelta(days=1))

In [13]:
df_isw = df_isw.rename(columns = {"date_datetime":"report_date"})
df_isw.to_csv(f"{OUTPUT_FOLDER}/{ISW_OUTPUT_DATA_FILE}", sep=";", index=False)

In [14]:
df_isw.head(5)

Unnamed: 0,date,text,lemm,stemm,keywords,report_date,date_tomorrow_datetime
0,2022-03-01,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march ukrai...,snaz unit throughout kherson citi march ukrai...,"{'snaz': 0.345, 'commenc': 0.295, 'desna': 0.2...",2022-03-01,2022-03-02
1,2022-03-02,"SNAZ units throughout Kherson City on March 1,...",snaz unit throughout kherson citi march inclu...,snaz unit throughout kherson citi march inclu...,"{'mykolayiv': 0.494, 'toward': 0.264, 'march':...",2022-03-02,2022-03-03
2,2022-03-03,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'march': 0.501, 'kyiv': 0.361, 'chernihiv': 0...",2022-03-03,2022-03-04
3,2022-03-04,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'kyiv': 0.352, 'mykolayiv': 0.205, 'march': 0...",2022-03-04,2022-03-05
4,2022-03-05,\n\n\n\n\n\n\n\n\nRussian Offensive Campaign A...,russian offens campaign ass march institut st...,russian offen campaign assess march institut ...,"{'march': 0.335, 'kyiv': 0.317, 'hour': 0.211,...",2022-03-05,2022-03-06


## prepare events data

In [15]:
EVENTS_DATA_FOLDER = "data/1_events"
EVENTS_DATA_FILE = "4FeaturesGenerated.csv"

In [16]:
df_events = pd.read_csv(f"{EVENTS_DATA_FOLDER}/{EVENTS_DATA_FILE}", sep=";")
#df_events.drop(['delete_2', 'delete_3'], axis=1, inplace=True)

In [17]:
df_events_v2 = df_events.drop(["id","region_id"],axis=1)

In [18]:
df_events_v2.head(5)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,1,89.627778,True


In [19]:
#df_events_v2["start_time"] = df_events_v2.apply(lambda x: x["start"] if not isNaN(x["start"]) else x["event_time"] , axis=1)
#df_events_v2["end_time"] = df_events_v2.apply(lambda x: x["end"] if not isNaN(x["end"]) else x["event_time"], axis=1)

In [20]:
df_events_v2["start_time"] = pd.to_datetime(df_events_v2["start"])
df_events_v2["end_time"] = pd.to_datetime(df_events_v2["end"])
#df_events_v2["event_time"] = pd.to_datetime(df_events_v2["event_time"])

In [21]:
df_events_v2["start_hour"] = df_events_v2['start_time'].dt.floor('H')
df_events_v2["end_hour"] = df_events_v2['end_time'].dt.ceil('H')
#df_events_v2["event_hour"] = df_events_v2['event_time'].dt.round('H')

In [22]:
df_events_v2["start_hour"] = df_events_v2.apply(lambda x: x["start_hour"] if not isNaN(x["start_hour"]) else x["event_hour"] , axis=1)
df_events_v2["end_hour"] = df_events_v2.apply(lambda x: x["end_hour"] if not isNaN(x["end_hour"]) else x["event_hour"] , axis=1)
df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near,start_time,end_time,start_hour,end_hour
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,1,89.627778,True,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,2,0,2.616667,True,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,1,0,64.877778,True,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,0,2,,True,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,0,2,,True,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,0,3,,True,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00


In [23]:
df_events_v2["day_date"] = df_events_v2["start_time"].dt.date

df_events_v2["start_hour_datetimeEpoch"] = df_events_v2['start_hour'].apply(lambda x: int(calendar.timegm(x.timetuple()))  if not isNaN(x) else None)
df_events_v2["end_hour_datetimeEpoch"] = df_events_v2['end_hour'].apply(lambda x: int(calendar.timegm(x.timetuple()))  if not isNaN(x) else None)

df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,1,89.627778,True,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,2022-02-26,1645862400,1645873200
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,2,0,2.616667,True,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,2022-02-26,1645869600,1645876800
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,1,0,64.877778,True,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,2022-02-26,1645880400,1645887600
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,0,2,,True,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,2022-02-26,1645887600,1645894800
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,0,2,,True,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,2022-02-26,1645891200,1645894800
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,0,3,,True,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,2022-02-26,1645891200,1645898400


In [24]:
#df_events_v2[~(df_events_v2["type"]=="alarm")].shape

In [25]:
#df_events_v2[~(df_events_v2["type"]=="alarm")].head(5)

In [26]:
#df_events_v2[df_events_v2["type"]=="alarm"].shape

## prepare weather

In [27]:
WEATHER_DATA_FOLDER = "data/1_weather"
WEATHER_DATA_FILE = "all_weather_by_hour.csv"

In [28]:
df_weather = pd.read_csv(f"{WEATHER_DATA_FOLDER}/{WEATHER_DATA_FILE}")
df_weather["day_datetime"] = pd.to_datetime(df_weather["day_datetime"])

In [29]:
df_weather.shape

(190656, 67)

In [30]:
df_weather.head(15)

Unnamed: 0,city_latitude,city_longitude,city_resolvedAddress,city_address,city_timezone,city_tzoffset,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_feelslikemax,day_feelslikemin,day_feelslike,day_dew,day_humidity,day_precip,day_precipprob,day_precipcover,day_snow,day_snowdepth,day_windgust,day_windspeed,day_winddir,day_pressure,day_cloudcover,day_visibility,day_solarradiation,day_solarenergy,day_uvindex,day_severerisk,day_sunrise,day_sunriseEpoch,day_sunset,day_sunsetEpoch,day_moonphase,day_conditions,day_description,day_icon,day_source,day_preciptype,day_stations,hour_datetime,hour_datetimeEpoch,hour_temp,hour_feelslike,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,hour_icon,hour_source,hour_stations
0,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,00:00:00,1645653600,0.9,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
1,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,01:00:00,1645657200,0.6,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
2,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,02:00:00,1645660800,0.4,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,cloudy,obs,remote
3,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,03:00:00,1645664400,0.2,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
4,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,04:00:00,1645668000,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,partly-cloudy-night,obs,remote
5,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,05:00:00,1645671600,-0.1,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,cloudy,obs,remote
6,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,06:00:00,1645675200,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,cloudy,obs,remote
7,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,07:00:00,1645678800,0.0,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,fog,obs,remote
8,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,08:00:00,1645682400,0.1,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,fog,obs,remote
9,49.4407,32.0637,"Черкаси, Україна","Cherkasy,Ukraine",Europe/Kiev,2.0,2022-02-24,1645653600,4.9,-0.1,1.9,4.4,-0.1,1.6,-0.6,83.4,0.0,0.0,0.0,0.0,0.0,14.4,8.3,47.6,1023.2,69.4,12.2,47.4,4.4,2.0,10.0,06:44:41,1645677881,17:26:05,1645716365,0.77,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,obs,rain,remote,09:00:00,1645686000,1.4,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,fog,obs,remote


In [31]:
# len(clmns)

In [32]:
# exclude
weather_exclude = [
"day_feelslikemax",
"day_feelslikemin",
"day_sunriseEpoch",
"day_sunsetEpoch",
"day_description",
"city_latitude",
"city_longitude",
"city_address",
"city_timezone",
"city_tzoffset",
"day_feelslike",
"day_precipprob",
"day_snow",
"day_snowdepth",
"day_windgust",
"day_windspeed",
"day_winddir",
"day_pressure",
"day_cloudcover",
"day_visibility",
"day_severerisk",
"day_conditions",
"day_icon",
"day_source",
"day_preciptype",
"day_stations",
"hour_icon",
"hour_source",
"hour_stations",
"hour_feelslike"
]

In [33]:
# new_list = [x for x in clmns if (x not in weather_exclude)]
# new_list

In [34]:
df_weather_v2 = df_weather.drop(weather_exclude, axis=1)

In [35]:
df_weather_v2["city"] = df_weather_v2["city_resolvedAddress"].apply(lambda x: x.split(",")[0])
df_weather_v2["city"] = df_weather_v2["city"].replace('Хмельницька область', "Хмельницький")

In [36]:
df_weather_v2.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси


In [37]:
df_weather_v2.shape

(190656, 38)

## merging data

In [38]:
df_regions = pd.read_csv(f"data/0_meta/regions.csv")

In [39]:
df_regions.head(5)

Unnamed: 0,region,center_city_ua,center_city_en,region_alt,region_id
0,АР Крим,Сімферополь,Simferopol,Крим,1
1,Вінницька,Вінниця,Vinnytsia,Вінниччина,2
2,Волинська,Луцьк,Lutsk,Волинь,3
3,Дніпропетровська,Дніпро,Dnipro,Дніпропетровщина,4
4,Донецька,Донецьк,Donetsk,Донеччина,5


In [40]:
df_weather_reg = pd.merge(df_weather_v2, df_regions, left_on="city",right_on="center_city_ua")

In [41]:
df_weather_reg.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [42]:
df_weather_reg.shape

(190656, 43)

In [43]:
df_weather_v2.shape

(190656, 38)

### Merging weather and events

In [44]:
# df_events_v2["start_hour_datetimeEpoch"] = df_events_v2['start_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)
# df_events_v2["end_hour_datetimeEpoch"] = df_events_v2['end_hour'].apply(lambda x: int(x.strftime('%s'))  if not isNaN(x) else 0)

In [45]:
df_events_v2.dtypes

region_title                        object
region_city                         object
all_region                           int64
start                               object
end                                 object
clean_end                           object
intersection_alarm_id              float64
alarms_past_24                       int64
simultaneous_alarms                  int64
hours_from_last_alarm              float64
holiday_is_near                       bool
start_time                  datetime64[ns]
end_time                    datetime64[ns]
start_hour                  datetime64[ns]
end_hour                    datetime64[ns]
day_date                            object
start_hour_datetimeEpoch             int64
end_hour_datetimeEpoch               int64
dtype: object

In [46]:
df_events_v2.shape

(19933, 18)

In [47]:
df_events_v2.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600
1,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400
2,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400
3,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600
4,Вінниччина,Вінниця,0,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 10:42:41,,1,1,89.627778,True,2022-02-26 08:39:39,2022-02-26 10:42:41,2022-02-26 08:00:00,2022-02-26 11:00:00,2022-02-26,1645862400,1645873200
5,Вінниччина,Вінниця,0,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 11:59:40,,2,0,2.616667,True,2022-02-26 10:58:23,2022-02-26 11:59:40,2022-02-26 10:00:00,2022-02-26 12:00:00,2022-02-26,1645869600,1645876800
6,Львівщина,Львів,0,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 14:27:25,,1,0,64.877778,True,2022-02-26 13:44:44,2022-02-26 14:27:25,2022-02-26 13:00:00,2022-02-26 15:00:00,2022-02-26,1645880400,1645887600
7,Рівненщина,Рівненська обл.,1,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 16:14:46,,0,2,,True,2022-02-26 15:54:53,2022-02-26 16:14:46,2022-02-26 15:00:00,2022-02-26 17:00:00,2022-02-26,1645887600,1645894800
8,Волинь,Волинська обл.,1,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:39:26,,0,2,,True,2022-02-26 16:08:26,2022-02-26 16:39:26,2022-02-26 16:00:00,2022-02-26 17:00:00,2022-02-26,1645891200,1645894800
9,Хмельниччина,Деражня,0,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 17:19:57,,0,3,,True,2022-02-26 16:10:29,2022-02-26 17:19:57,2022-02-26 16:00:00,2022-02-26 18:00:00,2022-02-26,1645891200,1645898400


In [48]:
# df_events_v2_sample = df_events_v2.sample(10)
# df_events_v2_sample.shape

events_dict = df_events_v2.to_dict('records')
events_by_hour = []

In [49]:
events_dict[0]

{'region_title': 'Вінниччина',
 'region_city': 'Вінниця',
 'all_region': 0,
 'start': '2022-02-25 22:55:42',
 'end': '2022-02-25 23:41:53',
 'clean_end': '2022-02-25 23:41:53',
 'intersection_alarm_id': nan,
 'alarms_past_24': 0,
 'simultaneous_alarms': 0,
 'hours_from_last_alarm': nan,
 'holiday_is_near': True,
 'start_time': Timestamp('2022-02-25 22:55:42'),
 'end_time': Timestamp('2022-02-25 23:41:53'),
 'start_hour': Timestamp('2022-02-25 22:00:00'),
 'end_hour': Timestamp('2022-02-26 00:00:00'),
 'day_date': datetime.date(2022, 2, 25),
 'start_hour_datetimeEpoch': 1645826400,
 'end_hour_datetimeEpoch': 1645833600}

In [50]:
for event in events_dict:
    for d in pd.date_range(start=event["start_hour"], end=event["end_hour"], freq='1H'):
        et = event.copy()
        et["hour_level_event_time"] = d
        events_by_hour.append(et)

In [51]:
df_events_v3 = pd.DataFrame.from_dict(events_by_hour)

In [53]:
df_events_v3["hour_level_event_datetimeEpoch"] = df_events_v3["hour_level_event_time"].apply(lambda x: int(calendar.timegm(x.timetuple()))  if not isNaN(x) else None)

In [54]:
df_events_v3.shape

(58860, 20)

In [55]:
df_events_v3.head(15)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 22:00:00,1645826400
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 23:00:00,1645830000
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-26 00:00:00,1645833600
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 06:00:00,1645855200
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 07:00:00,1645858800
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645862400
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 07:00:00,1645858800
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 08:00:00,1645862400
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 08:00:00,1645862400
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 09:00:00,1645866000


In [56]:
df_weather_reg.head(5)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23


In [57]:
df_weather_reg.shape

(190656, 43)

In [58]:
df_events_v3.head(10)

Unnamed: 0,region_title,region_city,all_region,start,end,clean_end,intersection_alarm_id,alarms_past_24,simultaneous_alarms,hours_from_last_alarm,holiday_is_near,start_time,end_time,start_hour,end_hour,day_date,start_hour_datetimeEpoch,end_hour_datetimeEpoch,hour_level_event_time,hour_level_event_datetimeEpoch
0,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 22:00:00,1645826400
1,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-25 23:00:00,1645830000
2,Вінниччина,Вінниця,0,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 23:41:53,,0,0,,True,2022-02-25 22:55:42,2022-02-25 23:41:53,2022-02-25 22:00:00,2022-02-26 00:00:00,2022-02-25,1645826400,1645833600,2022-02-26 00:00:00,1645833600
3,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 06:00:00,1645855200
4,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 07:00:00,1645858800
5,Львівщина,Львів,0,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 07:15:28,,0,0,,True,2022-02-26 06:26:17,2022-02-26 07:15:28,2022-02-26 06:00:00,2022-02-26 08:00:00,2022-02-26,1645855200,1645862400,2022-02-26 08:00:00,1645862400
6,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 07:00:00,1645858800
7,Одещина,Одеса,0,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:47:03,,0,0,,True,2022-02-26 07:16:58,2022-02-26 07:47:03,2022-02-26 07:00:00,2022-02-26 08:00:00,2022-02-26,1645858800,1645862400,2022-02-26 08:00:00,1645862400
8,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 08:00:00,1645862400
9,Житомирщина,Житомир,0,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 09:36:36,,0,1,,True,2022-02-26 08:05:54,2022-02-26 09:36:36,2022-02-26 08:00:00,2022-02-26 10:00:00,2022-02-26,1645862400,1645869600,2022-02-26 09:00:00,1645866000


In [59]:
df_events_v4 = df_events_v3.copy().add_prefix('event_')

In [60]:
df_weather_v4 = df_weather_reg.merge(df_events_v4, 
                                     how="left", 
                                     left_on=["region_alt","hour_datetimeEpoch"],
                                     right_on=["event_region_title","event_hour_level_event_datetimeEpoch"])

In [61]:
df_weather_v4.head(10)

Unnamed: 0,city_resolvedAddress,day_datetime,day_datetimeEpoch,day_tempmax,day_tempmin,day_temp,day_dew,day_humidity,day_precip,day_precipcover,day_solarradiation,day_solarenergy,day_uvindex,day_sunrise,day_sunset,day_moonphase,hour_datetime,hour_datetimeEpoch,hour_temp,hour_humidity,hour_dew,hour_precip,hour_precipprob,hour_snow,hour_snowdepth,hour_preciptype,hour_windgust,hour_windspeed,hour_winddir,hour_pressure,hour_visibility,hour_cloudcover,hour_solarradiation,hour_solarenergy,hour_uvindex,hour_severerisk,hour_conditions,city,region,center_city_ua,center_city_en,region_alt,region_id,event_region_title,event_region_city,event_all_region,event_start,event_end,event_clean_end,event_intersection_alarm_id,event_alarms_past_24,event_simultaneous_alarms,event_hours_from_last_alarm,event_holiday_is_near,event_start_time,event_end_time,event_start_hour,event_end_hour,event_day_date,event_start_hour_datetimeEpoch,event_end_hour_datetimeEpoch,event_hour_level_event_time,event_hour_level_event_datetimeEpoch
0,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,00:00:00,1645653600,0.9,75.73,-2.9,0.0,0.0,0.0,0.0,,7.2,4.0,132.0,1020.0,24.1,89.0,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
1,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,01:00:00,1645657200,0.6,77.96,-2.8,0.0,0.0,0.0,0.0,,6.5,3.6,147.9,1020.0,24.1,71.1,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
2,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,02:00:00,1645660800,0.4,82.08,-2.3,0.0,0.0,0.0,0.0,,8.3,3.2,198.2,1020.0,24.1,95.4,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
3,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,03:00:00,1645664400,0.2,83.28,-2.3,0.0,0.0,0.0,0.0,,5.0,2.9,202.3,1020.0,24.1,74.6,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
4,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,04:00:00,1645668000,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,5.0,2.9,203.6,1021.0,24.1,67.8,0.0,,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
5,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,05:00:00,1645671600,-0.1,83.86,-2.5,0.0,0.0,0.0,0.0,,4.3,1.8,197.5,1021.0,24.1,94.1,0.0,,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
6,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,06:00:00,1645675200,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.9,1.1,199.2,1022.0,24.1,100.0,0.0,0.0,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
7,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,07:00:00,1645678800,0.0,83.87,-2.4,0.0,0.0,0.0,0.0,,2.2,1.4,221.7,1023.0,0.2,97.9,0.0,0.3,0.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
8,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,08:00:00,1645682400,0.1,87.68,-1.7,0.0,0.0,0.0,0.0,,5.4,3.2,245.3,1023.0,0.1,90.8,179.4,0.6,2.0,10.0,Overcast,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,
9,"Черкаси, Україна",2022-02-24,1645653600,4.9,-0.1,1.9,-0.6,83.4,0.0,0.0,47.4,4.4,2.0,06:44:41,17:26:05,0.77,09:00:00,1645686000,1.4,85.91,-0.7,0.0,0.0,0.0,0.0,,5.0,2.5,247.0,1024.0,0.1,73.3,42.0,0.2,0.0,10.0,Partially cloudy,Черкаси,Черкаська,Черкаси,Cherkasy,Черкащина,23,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,NaT,


In [62]:
df_weather_v4.shape

(203414, 63)

In [63]:
df_weather_v4.to_csv(f"{OUTPUT_FOLDER}/{WEATHER_EVENTS_OUTPUT_DATA_FILE}", sep=";", index=False)