In [1]:
from utils.viz import plot_time_series, plot_balance, plot_balance_vs_price
import matplotlib.pyplot as plt
import numpy as np

In [2]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})
plt.rcParams["font.family"] = "Times New Roman"
plt.style.use('seaborn-whitegrid')
plt.style.use('seaborn-poster')
plt.style.use('seaborn-dark-palette')
plt.rcParams["mathtext.fontset"] = "cm"

In [3]:
import pandas as pd
from tqdm import tqdm
from statsmodels.tsa.statespace.varmax import VARMAX

In [4]:
# Step 1: Data Loading
data_df = pd.read_csv('../dataset/insect/ant/location_in_mm.csv')
colony_id = 1
ant_id = 1
data_df = data_df[(data_df['colony_id']==colony_id) & (data_df['ant_id']==ant_id)]
ant_num = colony_id + ant_id
data_df.reset_index(inplace = True)
data = data_df[['location_x', 'location_y']]

In [5]:
# Step 2: Data Preparation
#creating the train and validation set
train = data[:int(0.5*(len(data)))]
valid = data[int(0.5*(len(data))):]
print(train)

      location_x  location_y
0          61.73       15.44
1          61.55       15.48
2          61.55       15.40
3          61.55       15.44
4          61.55       15.44
...          ...         ...
1435       55.01       14.61
1436       55.01       14.61
1437       55.01       14.61
1438       55.01       14.61
1439       55.01       14.61

[1440 rows x 2 columns]


In [6]:
tmp = []
for p in tqdm(range(5)):
    for q in tqdm(range(5)):
        try:
            tmp.append([VARMAX(train, order = (p,q)).fit().bic, p, q])
        except:
            tmp.append([None, p, q])
tmp = pd.DataFrame(tmp,columns = ['bic', 'p', 'q'])
print(tmp)
order = tmp[tmp['bic'] == tmp['bic'].min()]
print(order)

  0%|          | 0/5 [00:00<?, ?it/s]




100%|██████████| 5/5 [00:15<00:00,  3.06s/it][A
 20%|██        | 1/5 [00:15<01:01, 15.31s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

100%|██████████| 5/5 [00:17<00:00,  3.49s/it][A
 40%|████      | 2/5 [00:32<00:49, 16.58s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

100%|██████████| 5/5 [00:24<00:00,  4.99s/it][A
 60%|██████    | 3/5 [00:57<00:40, 20.41s/it]
  0%|          | 0/5 [00:00<?, ?it/s][


100%|██████████| 5/5 [00:35<00:00,  7.04s/it][A
 80%|████████  | 4/5 [01:32<00:26, 26.25s/it]
  0%|          | 0/5 [00:00<?, ?it/s][A
  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

  warn('Estimation of VARMA(p,q) models is not generically robust,'

100%|██████████| 5/5 [00:43<00:00,  8.72s/it][A
100%|██████████| 5/5 [02:16<00:00, 27.31s/it]

            bic  p  q
0           NaN  0  0
1   9303.364376  0  1
2   8953.766606  0  2
3   8417.407251  0  3
4   7960.427762  0  4
5   -282.866952  1  0
6   -535.895960  1  1
7   -572.135760  1  2
8   -602.876744  1  3
9   -624.362918  1  4
10  -504.617674  2  0
11  -557.951950  2  1
12  -563.382951  2  2
13  -602.109456  2  3
14  -603.351085  2  4
15  -519.718852  3  0
16  -536.503994  3  1
17  -552.124392  3  2
18  -631.172350  3  3
19  -639.599976  3  4
20  -656.217363  4  0
21  -628.517182  4  1
22  -647.993489  4  2
23  -655.209757  4  3
24  -635.740072  4  4
           bic  p  q
20 -656.217363  4  0





In [None]:
prediction_x = list()
prediction_y = list()

for i in range(len(train)):
    prediction_x.append(train.iloc[i, 0])
    prediction_y.append(train.iloc[i, 1])

start_t = len(train)
for t_i in tqdm(range(len(valid))):
    current_t = t_i + start_t
    model = VARMAX(data[t_i:current_t], order = (order['p'], order['q']))
    fitted_model = model.fit()
    prediction = fitted_model.forecast().reset_index(drop=True) 
    prediction_x.append(prediction['location_x']) 
    prediction_y.append(prediction['location_y']) 
    


  8%|▊         | 108/1440 [05:20<42:32,  1.92s/it] 

In [None]:
#plot_time_series(ts_1 = prediction_chamber, ts_label_1 = 'VARMA Model', ts_2 = valid['chamber'], ts_label_2 = 'Close', title = 'VARMA predictions vs. ground truth')
plot_time_series(ts_1 = prediction_x, ts_label_1 = 'VARMA Model', ts_2 = data_df['location_x'], ts_label_2 = 'Close', title = 'VARMA predictions vs. ground truth of location x')
plot_time_series(ts_1 = prediction_y, ts_label_1 = 'VARMA Model', ts_2 = data_df['location_y'], ts_label_2 = 'Close', title = 'VARMA predictions vs. ground truth of location y')


In [None]:
def num_chamber(list_x, list_y):
    list_c = list()
    for i in range(len(list_x)):
        x = int(list_x[i])
        y = int(list_y[i])
        if (y > 178):
            list_c.append(5)
        else:
            if ((y <= 0)&(x <= 0)):
                list_c.append(5)
            else:
                if (y <= 46):
                    list_c.append(1)
                else:
                    if (y <= 92):
                        list_c.append(2)
                    else:
                        if (y <= 138):
                            list_c.append(3)
                        else:
                            list_c.append(4)
    return list_c  

In [None]:
prediction_c = num_chamber(prediction_x, prediction_y)
prediction_c = pd.DataFrame(prediction_c, columns = ['%i'%num_ant])
prediction_c.to_csv('../dataset/insect/ant/prediction_%i.csv'%num_ant, index = False)