In [1]:
import pandas as pd
import sktime
import numpy as np
import matplotlib
import datetime
import utils

In [2]:
weather_center = pd.read_excel("Погода_центры_2020.xls")
accident_weather = pd.read_excel("Аварии_погода_САЦ_2020.xls")

In [3]:
weather_center.columns
weather_cols = ['lon', 'lat', 'temperature', 'winddirection',
       'windspeedms', 'precipitation', 'snowstorm', 'mist', 'hail',
       'glaze_ice', 'blast', 'squall', 'storm_rainfall', 'electric_storm']

In [4]:
weather_temp = weather_center.copy()
for col in weather_cols:
    weather_temp = weather_temp[weather_temp[col] != -9999]

In [5]:
accident_weather = accident_weather.drop([0], axis=0)
#accident_weather['Субъект РФ'].apply(lambda x: utils.SUBJECTS_TO_CITIES.get(x, "RU-"))

In [6]:
def change_sub_to_city(subject):
    if subject in utils.SUBJECTS_TO_CITIES.keys():
        return utils.SUBJECTS_TO_CITIES[subject]

    subjects = subject.split(',')
    for sub in subjects:
        sub = sub.strip()
        if sub in utils.SUBJECTS_TO_CITIES.keys():
            return utils.SUBJECTS_TO_CITIES[sub]
    others = {
        'Город Москва':'MOSKVA',
        'Кемеровская область - Кузбасс':'KEMEROVO',
         "Республика Крым": "SIMFEROPOL'",
        "Республика Татарстан": "KAZAN'",
        'Ханты-Мансийский АО - Югра': 'HANTY-MANSIJSK'
    }
    return others.get(subject, "None")


In [11]:
accident_weather['city'] = accident_weather['Субъект РФ'].apply(change_sub_to_city)

In [8]:
accident_weather = accident_weather[accident_weather['city'] != "None"]

In [14]:
accident_weather['date'] = accident_weather['Дата'].apply(lambda x:x.date())

In [29]:
def get_event(row):

    time = datetime.datetime.strptime(row[0], "%Y-%m-%d %H:%M:%S.%f")
    city = row[1]
    day = time.date()
    city_cond = (accident_weather['city'] == city)
    date_cond = (accident_weather['date'] == day)
    d = accident_weather[city_cond & city_cond]
    if len(d) == 0:
        return 'Норма'
    else:
        return d['Причина'].iloc[0]


In [33]:
weather_temp['event'] = weather_temp[['datetime_d', 'name']].apply(get_event, axis=1)

In [34]:
sum(weather_temp['event'] == 2)

0

In [35]:
weather_temp['event']

0        Норма
2        Норма
4        Норма
6        Норма
8        Норма
         ...  
62124    Норма
62159    Норма
62195    Норма
62210    Норма
62218    Норма
Name: event, Length: 1438, dtype: object

In [38]:
classes = set(weather_temp['event'])

In [39]:
X_columns = ['lon', 'lat', 'temperature', 'winddirection',
       'windspeedms', 'precipitation', 'snowstorm', 'mist', 'hail',
       'glaze_ice', 'blast', 'squall', 'storm_rainfall', 'electric_storm']
Y_columns = ['event']

In [40]:
X = weather_temp[X_columns]
Y = weather_temp[Y_columns]

# PREDICTION

In [42]:
from sktime.classification.interval_based import TimeSeriesForestClassifier

In [62]:
classifier = TimeSeriesForestClassifier()
X_t = X.values.tolist()
#classifier.fit(X_t, Y['event'])

In [65]:
X_t = list(map(lambda x: pd.DataFrame(x), X_t))

In [75]:
X_t = pd.DataFrame(X.apply(lambda x: np.array(x), axis = 1))

In [76]:
classifier.fit(X_t, Y['event'])

TimeSeriesForestClassifier()

In [79]:
y_p = classifier.predict(X_t)

{'Гололёдообразование', 'Норма', 'Падение деревьев (природные)', 'Пожары'}

In [81]:
from sklearn.metrics import accuracy_score
accuracy_score(Y['event'], y_p)

1.0

In [82]:
X_t

Unnamed: 0,0
0,"[177.567, 64.783, 14.0, 180.0, 6.0, 0.0, 0.0, ..."
2,"[177.567, 64.783, 11.0, 40.0, 2.0, 4.0, 0.0, 0..."
4,"[177.567, 64.783, 9.0, 210.0, 3.0, 0.0, 0.0, 0..."
6,"[177.567, 64.783, 7.0, 140.0, 9.0, 0.0, 0.0, 0..."
8,"[177.567, 64.783, 5.0, 130.0, 8.0, 2.0, 0.0, 0..."
...,...
62124,"[33.533, 44.617, 29.0, 0.0, 4.0, 0.0, 0.0, 0.0..."
62159,"[33.533, 44.617, 19.0, 240.0, 4.0, 0.0, 0.0, 0..."
62195,"[33.533, 44.617, 1.0, 60.0, 3.0, 3.0, 0.0, 0.0..."
62210,"[33.533, 44.617, 23.0, 70.0, 1.0, 0.0, 0.0, 0...."


In [83]:
import joblib
joblib.dump(classifier, "classifier.joblib")

['classifier.joblib']

In [95]:
std = np.mean([
    tree.feature_importances_ for tree in classifier.estimators_], axis=0)
classifier.estimators_[0].max_features_

9