# Ukryte Modele Markova

## Przygotowanie danych

- [x] zdyskretyzować dane 4 lub 3 przedzialy
- [ ] tworzenie modelu markova - algorytm viterbiego
- odległość hamminga po nastrojeniu modelu - wybieramy najbardziej podobną obserwacje
- wstępne strojenie macierzy A

In [65]:
import pandas as pd
import numpy as np

df = pd.read_csv('szczecin_sep_2009.txt', header=None)
df = df.rename({0: "czas", 1: "temperatura", 3: "wilgotnosc", 4: "cisnienie", 5: "wiatr", 11: "condition"}, axis='columns')
df = df.drop(columns=[2, 6, 7, 8, 9, 10])

In [66]:
df['temperatura'] = pd.cut(df['temperatura'], bins=4, labels=[1, 2, 3, 4])
df['wilgotnosc'] = pd.cut(df['wilgotnosc'], bins=4, labels=[1, 2, 3, 4])
df['cisnienie'] = pd.cut(df['cisnienie'], bins=4, labels=[1, 2, 3, 4])
df['wiatr'] = pd.cut(df['wiatr'], bins=4, duplicates='drop', labels=[1, 2, 3, 4])

In [67]:
df.head()

Unnamed: 0,czas,temperatura,wilgotnosc,cisnienie,wiatr,condition
0,12:00 AM,2,3,3,1,Clear
1,12:30 AM,2,3,3,1,Clear
2,1:00 AM,2,3,3,1,Clear
3,1:30 AM,2,3,3,1,Clear
4,2:00 AM,2,3,3,1,Clear


In [68]:
df.describe()

Unnamed: 0,czas,temperatura,wilgotnosc,cisnienie,wiatr,condition
count,1438,1438,1438,1438,1438,1438
unique,48,4,4,4,2,14
top,12:30 AM,2,4,4,1,Clear
freq,30,735,709,596,807,807


In [69]:
np.unique(df['condition'].values)

array(['Clear', 'Fog', 'Light Rain', 'Light Rain Showers',
       'Light Thunderstorms and Rain', 'Mist', 'Mostly Cloudy',
       'Partly Cloudy', 'Rain', 'Rain Showers', 'Scattered Clouds',
       'Shallow Fog', 'Thunderstorms and Rain', 'Unknown'], dtype=object)

In [70]:
observations = df['condition'].to_list()
states = df[['temperatura', 'wilgotnosc', 'cisnienie', 'wiatr']]
states = states.to_numpy()

In [71]:
states = [f'{temp}{wilg}{cis}{wiatr}' for (temp, wilg, cis, wiatr) in states]

In [72]:
observations = np.array(observations)

In [73]:
observations

array(['Clear', 'Clear', 'Clear', ..., 'Clear', 'Clear', 'Light Rain'],
      dtype='<U28')

## Tworzenie Ukrytego Modelu Markova

In [83]:
import mchmm as mc
obs_seq = observations
sts_seq = states
a = mc.HiddenMarkovModel().from_seq(obs_seq, sts_seq)

In [84]:
a.states

array(['1321', '1421', '1431', '1434', '1441', '1444', '2221', '2231',
       '2234', '2241', '2311', '2314', '2321', '2324', '2331', '2334',
       '2341', '2344', '2411', '2414', '2421', '2424', '2431', '2434',
       '2441', '2444', '3111', '3121', '3131', '3134', '3141', '3144',
       '3211', '3214', '3221', '3231', '3234', '3241', '3244', '3311',
       '3314', '3321', '3324', '3331', '3334', '3341', '3344', '3411',
       '3414', '3421', '3424', '3431', '3434', '3444', '4121', '4131',
       '4141', '4221', '4231', '4234', '4241'], dtype='<U4')

In [85]:
a.observations

array(['Clear', 'Fog', 'Light Rain', 'Light Rain Showers',
       'Light Thunderstorms and Rain', 'Mist', 'Mostly Cloudy',
       'Partly Cloudy', 'Rain', 'Rain Showers', 'Scattered Clouds',
       'Shallow Fog', 'Thunderstorms and Rain', 'Unknown'], dtype='<U28')

In [86]:
import pandas as pd
pd.DataFrame(a.ep, index=a.states, columns=a.observations)

Unnamed: 0,Clear,Fog,Light Rain,Light Rain Showers,Light Thunderstorms and Rain,Mist,Mostly Cloudy,Partly Cloudy,Rain,Rain Showers,Scattered Clouds,Shallow Fog,Thunderstorms and Rain,Unknown
1321,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
1421,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
1431,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
1434,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.000000,0.0,0.0,0.035714,0.857143,0.0,0.071429
1441,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4141,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
4221,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
4231,1.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000
4234,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.333333,0.0,0.0,0.666667,0.000000,0.0,0.000000


## Viterbi - odkrycie ścieżki stanów pogodowych mając daną sekwencję obserwacji

In [87]:
vs, vsi = a.viterbi(obs_seq)
# states sequence
print("VI", "".join(vs))
# observations
print("NO", obs_seq)

  t1[:, i] /= t1[:, i].sum()


VI 2431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243424342434243424342431243424342434243424342434243424342434243424342434243424342434243424342434243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243124312431243114311421132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211321132113211

## Podział danych na sekwencje uczące i testujące. 