In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta
from datetime import time as datetime_time
plt.style.use('seaborn-whitegrid')

In [2]:
# = pd.DataFrame(columns=['date', 'washers', 'dryers'])
#raw_data.read_csv("./both", header=None)

headers = ['datetime', 'wash', 'dry']
dtypes = {'datetime': 'str', 'wash': 'int', 'dry': 'int'}
parse_dates = ['datetime']
raw_data = pd.read_csv("./data", sep=',', header=None, names=headers, dtype=dtypes, parse_dates=parse_dates, date_parser=pd.to_datetime)
print(raw_data.dtypes)
print(raw_data)

datetime    datetime64[ns]
wash                 int64
dry                  int64
dtype: object
                  datetime  wash  dry
0      2018-12-16 21:40:05     0    2
1      2018-12-16 21:45:05     0    2
2      2018-12-16 21:50:05     0    2
3      2018-12-16 21:55:06     1    2
4      2018-12-16 22:00:05     1    2
...                    ...   ...  ...
182298 2020-09-21 14:40:05     0    3
182299 2020-09-21 14:45:04     0    3
182300 2020-09-21 14:50:05     0    3
182301 2020-09-21 14:55:04     2    3
182302 2020-09-21 15:00:04     2    3

[182303 rows x 3 columns]


In [3]:
def shift_forward(data):
    local_data=data.copy()
    local_data['datetime'] = [time + timedelta(hours=4) for time in local_data['datetime']]
    return local_data

def shift_back(data):
    local_data=data.copy()
    local_data['datetime'] = [time + timedelta(hours=-4) for time in local_data['datetime']]
    return local_data
    
shifted_data = shift_forward(shift_back(raw_data))
shifted_data

Unnamed: 0,datetime,wash,dry
0,2018-12-16 21:40:05,0,2
1,2018-12-16 21:45:05,0,2
2,2018-12-16 21:50:05,0,2
3,2018-12-16 21:55:06,1,2
4,2018-12-16 22:00:05,1,2
...,...,...,...
182298,2020-09-21 14:40:05,0,3
182299,2020-09-21 14:45:04,0,3
182300,2020-09-21 14:50:05,0,3
182301,2020-09-21 14:55:04,2,3


In [4]:
def get_by_months(data):
    return[g.reset_index() for n, g in data.set_index('datetime').groupby(pd.Grouper(freq='M'))]

months = get_by_months(raw_data)
print(pd.concat(months[1:3]))

                datetime  wash  dry
0    2019-01-01 00:00:05     4    3
1    2019-01-01 00:05:27     4    3
2    2019-01-01 00:10:05     4    3
3    2019-01-01 00:15:05     4    3
4    2019-01-01 00:20:26     4    3
...                  ...   ...  ...
8032 2019-02-28 23:35:05     4    1
8033 2019-02-28 23:40:05     4    1
8034 2019-02-28 23:45:05     4    1
8035 2019-02-28 23:50:06     4    1
8036 2019-02-28 23:55:05     4    1

[16269 rows x 3 columns]


In [5]:
def get_weekdays(data):
    return data[data.set_index('datetime').index.dayofweek < 5]

def get_weekends(data):
    return data[data.set_index('datetime').index.dayofweek >= 5]

print(get_weekdays(months[1]))
print(get_weekends(months[1]))

                datetime  wash  dry
0    2019-01-01 00:00:05     4    3
1    2019-01-01 00:05:27     4    3
2    2019-01-01 00:10:05     4    3
3    2019-01-01 00:15:05     4    3
4    2019-01-01 00:20:26     4    3
...                  ...   ...  ...
8227 2019-01-31 23:35:05     0    3
8228 2019-01-31 23:40:05     0    3
8229 2019-01-31 23:45:05     2    3
8230 2019-01-31 23:50:05     2    3
8231 2019-01-31 23:55:05     2    3

[6463 rows x 3 columns]
                datetime  wash  dry
1151 2019-01-05 00:00:05     3    3
1152 2019-01-05 00:05:05     3    3
1153 2019-01-05 00:10:05     4    3
1154 2019-01-05 00:15:04     4    3
1155 2019-01-05 00:20:04     4    3
...                  ...   ...  ...
7078 2019-01-27 23:35:06     0    2
7079 2019-01-27 23:40:06     0    3
7080 2019-01-27 23:45:06     0    3
7081 2019-01-27 23:50:05     2    3
7082 2019-01-27 23:55:05     1    3

[1769 rows x 3 columns]


In [6]:
def average_range(data):
    local_data=data.copy()
    local_data['datetime'] = [time.floor("5min").time() for time in local_data['datetime']]
    return local_data.set_index('datetime').groupby('datetime').mean().reset_index()

print(average_range(months[1]))

     datetime      wash       dry
0    00:00:00  3.551724  2.551724
1    00:05:00  3.482759  2.586207
2    00:10:00  3.310345  2.482759
3    00:15:00  3.310345  2.482759
4    00:20:00  3.250000  2.464286
..        ...       ...       ...
283  23:35:00  2.586207  2.103448
284  23:40:00  2.620690  2.275862
285  23:45:00  2.896552  2.310345
286  23:50:00  3.103448  2.310345
287  23:55:00  3.137931  2.448276

[288 rows x 3 columns]


In [7]:
def shift_time_forward(data):
    local_data=data.copy()
    local_data['datetime'] = [datetime_time((time.hour + 4)%24, time.minute) for time in local_data['datetime']]
    return local_data

def make_pretty(data_ref):
    data = data_ref.copy()
    data.names = ["Time", "Washers Available", "Dryers Available"]

def plot_data(data_ref, title, filename):
    data = data_ref.copy()
    data['int_index'] = range(len(data))
    ticks=[time.time() for time in pd.date_range(start='1/1/2018 4:00', end='1/3/2018', freq='4H')]
    tickss=[tick.strftime("%H:%M") for tick in ticks]
    ax = data.plot(x="datetime", y=["wash", "dry"], figsize=(12, 6), xticks=ticks, title=title)
    ax.set_xticklabels(tickss)
    ax.legend(["Washers Available", "Dryers Available"])
    ax.set_xlabel("Time")
    fig = ax.get_figure()
    fig.savefig("plots/"+filename)
    plt.close(fig)


#plot_data(average_range(get_weekdays(get_by_months(shift_back(raw_data))[1])), "Weekdays Jan 2019")

In [8]:
def process_range(data, label, prefix):
    plot_data(average_range(get_weekdays(data)), "Weekdays " + label, prefix + "_weekday_" + label.replace(" ", "_").lower() + ".png")
    plot_data(average_range(get_weekends(data)), "Weekend " + label, prefix + "_weekend_" + label.replace(" ", "_").lower() + ".png")

In [13]:
months = get_by_months(shift_back(raw_data))[1:]

[                datetime  wash  dry
0    2019-01-01 00:00:05     4    3
1    2019-01-01 00:05:05     4    3
2    2019-01-01 00:10:05     4    3
3    2019-01-01 00:15:05     4    3
4    2019-01-01 00:20:05     4    3
...                  ...   ...  ...
8227 2019-01-31 23:35:05     4    2
8228 2019-01-31 23:40:04     4    2
8229 2019-01-31 23:45:05     4    2
8230 2019-01-31 23:50:26     4    3
8231 2019-01-31 23:55:05     4    3

[8232 rows x 3 columns],                 datetime  wash  dry
0    2019-02-01 00:00:04     4    3
1    2019-02-01 00:05:05     4    3
2    2019-02-01 00:10:04     4    3
3    2019-02-01 00:15:05     4    3
4    2019-02-01 00:20:05     4    3
...                  ...   ...  ...
8032 2019-02-28 23:35:05     4    3
8033 2019-02-28 23:40:26     4    3
8034 2019-02-28 23:45:06     4    3
8035 2019-02-28 23:50:05     3    3
8036 2019-02-28 23:55:26     1    3

[8037 rows x 3 columns],                 datetime  wash  dry
0    2019-03-01 00:00:05     0    3
1    2019-0

In [14]:
for month in months:
    title = month['datetime'][0].strftime("%B %Y")
    m = month['datetime'][0].month
    y = month['datetime'][0].year
    pref = str(y) + "." + str(m).zfill(2)
    process_range(month, title, pref)

In [11]:
process_range(shift_back(raw_data), "all time Average", "0.0")