In [1]:
!pip install numpy

Collecting numpy
  Downloading numpy-1.22.4-cp310-cp310-win_amd64.whl (14.7 MB)
Installing collected packages: numpy
Successfully installed numpy-1.22.4


In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.4.2-cp310-cp310-win_amd64.whl (10.6 MB)
Collecting pytz>=2020.1
  Using cached pytz-2022.1-py2.py3-none-any.whl (503 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.4.2 pytz-2022.1


In [3]:
!pip install plotly

Collecting plotly
  Using cached plotly-5.8.0-py2.py3-none-any.whl (15.2 MB)
Collecting tenacity>=6.2.0
  Using cached tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.8.0 tenacity-8.0.1


In [156]:
import numpy as np
import pandas as pd
import plotly.express as px
from datetime import datetime
import math

In [50]:
data_dir = "PEMS"

X_train = np.load(f"data/{data_dir}/X_train.npy")
y_train = np.load(f"data/{data_dir}/y_train.npy")
X_test = np.load(f"data/{data_dir}/X_test.npy")
y_test = np.load(f"data/{data_dir}/y_test.npy")

In [51]:
print(X_train.shape)
X_train = X_train.reshape((X_train.shape[1],X_train.shape[0],X_train.shape[2]))
print(X_train.shape)

(267, 963, 144)
(963, 267, 144)


In [106]:
media_totala_per_senzor = X_train.mean((2,1))

In [107]:
media_totala_per_senzor = pd.DataFrame(zip(range(len(media_totala_per_senzor)),media_totala_per_senzor),columns=['Sensor Number','Occupancy rate'])
media_totala_per_senzor

Unnamed: 0,Sensor Number,Occupancy rate
0,0,0.055795
1,1,0.059336
2,2,0.056913
3,3,0.054410
4,4,0.059471
...,...,...
958,958,0.046054
959,959,0.054816
960,960,0.065454
961,961,0.063663


In [108]:
fig = px.bar(media_totala_per_senzor, x='Sensor Number',y='Occupancy rate',color='Occupancy rate')
fig.show()

In [118]:
print(X_train.shape)
X_train = X_train.reshape((X_train.shape[1],X_train.shape[0],X_train.shape[2]))
print(X_train.shape)

(963, 267, 144)
(267, 963, 144)


In [125]:
media_totala_per_day = X_train.mean(axis=(2,1))

In [126]:
media_totala_per_all_days = pd.DataFrame(zip(media_totala_per_day,y_train),columns=['Mean Day','Associated Day'])
media_totala_per_all_days.head()

Unnamed: 0,Mean Day,Associated Day
0,0.056604,3
1,0.057454,4
2,0.055478,3
3,0.058971,5
4,0.039951,1


In [135]:
mean_per_day = media_totala_per_all_days.groupby(['Associated Day']).agg({'Mean Day':'mean'}).reset_index()
mean_per_day

Unnamed: 0,Associated Day,Mean Day
0,1,0.03889
1,2,0.054032
2,3,0.056662
3,4,0.057385
4,5,0.059
5,6,0.059693
6,7,0.044528


In [136]:
mean_per_day.loc[mean_per_day["Associated Day"] == 1, 'Associated Day'] = 'Monday'
mean_per_day.loc[mean_per_day["Associated Day"] == 2, 'Associated Day'] = 'Tuesday'
mean_per_day.loc[mean_per_day["Associated Day"] == 3, 'Associated Day'] = 'Wednesday'
mean_per_day.loc[mean_per_day["Associated Day"] == 4, 'Associated Day'] = 'Thursday'
mean_per_day.loc[mean_per_day["Associated Day"] == 5, 'Associated Day'] = 'Friday'
mean_per_day.loc[mean_per_day["Associated Day"] == 6, 'Associated Day'] = 'Saturday'
mean_per_day.loc[mean_per_day["Associated Day"] == 7, 'Associated Day'] = 'Sunday'
mean_per_day

Unnamed: 0,Associated Day,Mean Day
0,Monday,0.03889
1,Tuesday,0.054032
2,Wednesday,0.056662
3,Thursday,0.057385
4,Friday,0.059
5,Saturday,0.059693
6,Sunday,0.044528


In [137]:
fig = px.bar(mean_per_day, x='Associated Day',y='Mean Day')
fig.show()

In [139]:
print(X_train.shape)
X_train = X_train.reshape((X_train.shape[1],X_train.shape[0],X_train.shape[2]))
print(X_train.shape)

(267, 963, 144)
(963, 267, 144)


In [150]:
mean_per_sensor_10th_minute = []
for i in range(X_train.shape[0]):
    mean_per_sensor_10th_minute.append(X_train[i].mean(axis=0))

In [154]:
mean_per_sensor_10th_minute = np.asarray(mean_per_sensor_10th_minute)
mean_per_sensor_10th_minute.shape

(963, 144)

In [164]:
start_time = '00:00:00'

end_time = '23:59:59'
start_times = pd.date_range(start= pd.Timestamp(start_time).floor('10T'), end = pd.Timestamp(end_time).floor('10T'), freq="10T")
start_times = start_times.strftime('%H:%M:%S')
start_times

Index(['00:00:00', '00:10:00', '00:20:00', '00:30:00', '00:40:00', '00:50:00',
       '01:00:00', '01:10:00', '01:20:00', '01:30:00',
       ...
       '22:20:00', '22:30:00', '22:40:00', '22:50:00', '23:00:00', '23:10:00',
       '23:20:00', '23:30:00', '23:40:00', '23:50:00'],
      dtype='object', length=144)

In [166]:
mean_per_all_10th_minute = mean_per_sensor_10th_minute.mean(axis=0)
mean_per_all_10th_minute.shape

(144,)

In [167]:
mean_10th_minute = pd.DataFrame(zip(start_times,mean_per_all_10th_minute),columns=['Minute mark','Mean value'])
mean_10th_minute

Unnamed: 0,Minute mark,Mean value
0,00:00:00,0.018951
1,00:10:00,0.017911
2,00:20:00,0.017267
3,00:30:00,0.015633
4,00:40:00,0.015342
...,...,...
139,23:10:00,0.027341
140,23:20:00,0.025507
141,23:30:00,0.023378
142,23:40:00,0.023085


In [170]:
fig = px.line(mean_10th_minute, x='Minute mark', y='Mean value')
fig.show()

In [172]:
print(X_train.shape)
X_train = X_train.reshape((X_train.shape[1],X_train.shape[0],X_train.shape[2]))
print(X_train.shape)

(963, 267, 144)
(267, 963, 144)


In [207]:
mean_per_day_10th_minute = []
for i in range(X_train.shape[0]):
    mean_per_day_10th_minute.append(X_train[i].mean(axis=0))

In [208]:
mean_per_day_10th_minute = np.asarray(mean_per_day_10th_minute)
mean_per_day_10th_minute.shape

(267, 144)

In [209]:
mean_per_day_10th_minute_df = pd.DataFrame(zip(mean_per_day_10th_minute,y_train),columns=['Mean per every 10th minute','Associated Day'])
mean_per_day_10th_minute_df

Unnamed: 0,Mean per every 10th minute,Associated Day
0,"[0.016200415368639653, 0.01513946002076845, 0....",3
1,"[0.015660851505711313, 0.015385150571131859, 0...",4
2,"[0.015888058151609524, 0.015549636552440321, 0...",3
3,"[0.016554724818276235, 0.016256490134994812, 0...",5
4,"[0.027075493250259614, 0.025550986500519225, 0...",1
...,...,...
262,"[0.01671692627206647, 0.01633987538940807, 0.0...",2
263,"[0.01688878504672899, 0.015591900311526513, 0....",5
264,"[0.016701453790238803, 0.016844444444444467, 0...",3
265,"[0.024568847352024914, 0.022383904465212844, 0...",7


In [210]:
mean_per_day_10th_minute_df = mean_per_day_10th_minute_df.groupby(['Associated Day'])['Mean per every 10th minute'].agg(lambda x: x.mean(axis=0)).reset_index()
mean_per_day_10th_minute_df

Unnamed: 0,Associated Day,Mean per every 10th minute
0,1,"[0.025321232476635543, 0.023584339304257518, 0..."
1,2,"[0.016666111760124634, 0.015573688992731028, 0..."
2,3,"[0.016122593867847174, 0.015593794064600772, 0..."
3,4,"[0.016277723384265433, 0.015705454185828038, 0..."
4,5,"[0.016844963655244048, 0.015708626144074974, 0..."
5,6,"[0.018311751471097273, 0.017341937810084217, 0..."
6,7,"[0.023558380534315117, 0.02221150523930898, 0...."


In [211]:
start_times_list = [start_times]*7
mean_per_day_10th_minute_df['Minute mark'] = start_times_list
mean_per_day_10th_minute_df

Unnamed: 0,Associated Day,Mean per every 10th minute,Minute mark
0,1,"[0.025321232476635543, 0.023584339304257518, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
1,2,"[0.016666111760124634, 0.015573688992731028, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
2,3,"[0.016122593867847174, 0.015593794064600772, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
3,4,"[0.016277723384265433, 0.015705454185828038, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
4,5,"[0.016844963655244048, 0.015708626144074974, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
5,6,"[0.018311751471097273, 0.017341937810084217, 0...","Index(['00:00:00', '00:10:00', '00:20:00', '00..."
6,7,"[0.023558380534315117, 0.02221150523930898, 0....","Index(['00:00:00', '00:10:00', '00:20:00', '00..."


In [212]:
mean_per_day_10th_minute_df = mean_per_day_10th_minute_df.explode(column=['Mean per every 10th minute','Minute mark'])
mean_per_day_10th_minute_df

Unnamed: 0,Associated Day,Mean per every 10th minute,Minute mark
0,1,0.025321,00:00:00
0,1,0.023584,00:10:00
0,1,0.022736,00:20:00
0,1,0.020331,00:30:00
0,1,0.019756,00:40:00
...,...,...,...
6,7,0.03424,23:10:00
6,7,0.032441,23:20:00
6,7,0.029791,23:30:00
6,7,0.029187,23:40:00


In [213]:
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 1, 'Associated Day'] = 'Monday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 2, 'Associated Day'] = 'Tuesday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 3, 'Associated Day'] = 'Wednesday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 4, 'Associated Day'] = 'Thursday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 5, 'Associated Day'] = 'Friday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 6, 'Associated Day'] = 'Saturday'
mean_per_day_10th_minute_df.loc[mean_per_day_10th_minute_df["Associated Day"] == 7, 'Associated Day'] = 'Sunday'
mean_per_day_10th_minute_df

Unnamed: 0,Associated Day,Mean per every 10th minute,Minute mark
0,Monday,0.025321,00:00:00
0,Monday,0.023584,00:10:00
0,Monday,0.022736,00:20:00
0,Monday,0.020331,00:30:00
0,Monday,0.019756,00:40:00
...,...,...,...
6,Sunday,0.03424,23:10:00
6,Sunday,0.032441,23:20:00
6,Sunday,0.029791,23:30:00
6,Sunday,0.029187,23:40:00


In [214]:
fig = px.line(mean_per_day_10th_minute_df, x='Minute mark', y='Mean per every 10th minute',color='Associated Day')
fig.show()

In [223]:
top_10_sensor_by_occupancy = media_totala_per_senzor.sort_values('Occupancy rate',ascending=False)[:10]['Sensor Number'].to_list()
top_10_sensor_by_occupancy

[510, 509, 953, 812, 960, 783, 221, 415, 578, 416]

In [224]:
least_10_sensor_by_occupancy = media_totala_per_senzor.sort_values('Occupancy rate',ascending=True)[:10]['Sensor Number'].to_list()
least_10_sensor_by_occupancy

[482, 481, 752, 480, 338, 924, 751, 695, 926, 520]

In [243]:
print(X_train.shape)
X_train = X_train.reshape((X_train.shape[1],X_train.shape[0],X_train.shape[2]))
print(X_train.shape)

(267, 963, 144)
(963, 267, 144)


In [267]:
dummy_top = np.asarray([X_train[i,:,:] for i in top_10_sensor_by_occupancy])
dummy_least = np.asarray([X_train[i,:,:] for i in least_10_sensor_by_occupancy])

In [268]:
print(dummy_top.shape)
print(dummy_least.shape)

(10, 267, 144)
(10, 267, 144)


In [269]:
top_10_mean_per_day_10th_minute = []
least_10_mean_per_day_10th_minute = []
for i in range(dummy_top.shape[0]):
    top_10_mean_per_day_10th_minute.append(dummy_top[i].mean(axis=0))
    
for i in range(dummy_least.shape[0]):
    least_10_mean_per_day_10th_minute.append(dummy_least[i].mean(axis=0))

In [270]:
top_10_mean_per_day_10th_minute = np.asarray(top_10_mean_per_day_10th_minute)
least_10_mean_per_day_10th_minute = np.asarray(least_10_mean_per_day_10th_minute)

print(top_10_mean_per_day_10th_minute.shape)
print(least_10_mean_per_day_10th_minute.shape)

(10, 144)
(10, 144)


In [271]:
top_10_mean_per_day_10th_minute = top_10_mean_per_day_10th_minute.mean(axis=0)
least_10_mean_per_day_10th_minute = least_10_mean_per_day_10th_minute.mean(axis=0)

print(top_10_mean_per_day_10th_minute.shape)
print(least_10_mean_per_day_10th_minute.shape)

(144,)
(144,)


In [274]:
top_least_10_mean_per_day_10th_minute = pd.DataFrame(zip(start_times,top_10_mean_per_day_10th_minute,least_10_mean_per_day_10th_minute),columns=['Minute mark','Top 10','Least 10'])
top_least_10_mean_per_day_10th_minute

Unnamed: 0,Minute mark,Top 10,Least 10
0,00:00:00,0.019105,0.022848
1,00:10:00,0.018167,0.021371
2,00:20:00,0.017648,0.020765
3,00:30:00,0.015862,0.018658
4,00:40:00,0.015611,0.017963
...,...,...,...
139,23:10:00,0.033219,0.021541
140,23:20:00,0.030287,0.020467
141,23:30:00,0.027592,0.018388
142,23:40:00,0.027381,0.018213


In [276]:
fig = px.line(top_least_10_mean_per_day_10th_minute, x=top_least_10_mean_per_day_10th_minute['Minute mark'], y=top_least_10_mean_per_day_10th_minute.columns[1:])
fig.show()

In [277]:
media_totala_per_senzor.sort_values('Occupancy rate',ascending=False)[:10]

Unnamed: 0,Sensor Number,Occupancy rate
510,510,0.067913
509,509,0.067251
953,953,0.066599
812,812,0.065902
960,960,0.065454
783,783,0.065198
221,221,0.064478
415,415,0.06444
578,578,0.063781
416,416,0.063706


In [280]:
media_totala_per_senzor.sort_values('Occupancy rate',ascending=True)[:10]

Unnamed: 0,Sensor Number,Occupancy rate
482,482,0.034057
481,481,0.034769
752,752,0.035353
480,480,0.035582
338,338,0.035866
924,924,0.036229
751,751,0.036242
695,695,0.036541
926,926,0.03682
520,520,0.036846


In [282]:
len(media_totala_per_senzor.loc[media_totala_per_senzor['Occupancy rate']<0.05,:])

284

In [283]:
len(media_totala_per_senzor.loc[media_totala_per_senzor['Occupancy rate']>0.05,:])

679