In [40]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from statistics import mode
import requests
import re
import time
from IPython.display import clear_output
from tqdm.notebook import tqdm

pd.set_option('display.max_rows', 200)

In [5]:
# Get a list of the files that we're grabbing
def get_list_of_files():
    """Grabs the full list of files from the website"""
    url = "http://web.mta.info/developers/turnstile.html"
    res=requests.get(url)

    if res.status_code == 200:
        page=BeautifulSoup(res.content)
        mydivs = page.find_all("div", {"class": "span-84 last"})

        file_list = []

        for div in mydivs:
            files=div.findAll('a')


        for file in files:
            file_list.append("http://web.mta.info/developers/"+file['href'])
        return file_list


In [None]:
links=get_list_of_files()


In [48]:
def get_data(weeks_num):
    links=get_list_of_files()   
    df_first=pd.read_csv(links[0],sep=',')
    for week in range(1,weeks_num):
        df_temp=pd.read_csv(links[week],sep=',')
        df=pd.concat([df_temp,df_first],axis=0,ignore_index=True)
        df.reset_index(inplace=True,drop=True)
        df_first=df
        print(f"Downloading {week} week of {weeks_num}")
        time.sleep(1)
        clear_output()
    print("Done")
    return df_first 


In [49]:
df=get_data(52)

In [56]:
df.shape

(10956784, 11)

In [57]:
df.isnull().sum()

C/A                                                                     0
UNIT                                                                    0
SCP                                                                     0
STATION                                                                 0
LINENAME                                                                0
DIVISION                                                                0
DATE                                                                    0
TIME                                                                    0
DESC                                                                    0
ENTRIES                                                                 0
EXITS                                                                   0
dtype: int64

No null values

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10956784 entries, 0 to 10956783
Data columns (total 11 columns):
 #   Column                                                                Dtype 
---  ------                                                                ----- 
 0   C/A                                                                   object
 1   UNIT                                                                  object
 2   SCP                                                                   object
 3   STATION                                                               object
 4   LINENAME                                                              object
 5   DIVISION                                                              object
 6   DATE                                                                  object
 7   TIME                                                                  object
 8   DESC                                                        

All columns except DATE and TIME are in the correct format. Let's unite that columns and change to datetime.

In [59]:
# snake_case column names
df.columns=df.columns.str.lower().str.replace(" ","")
# converting date to datetime format
df["datetime"]=df["date"]+" "+df["time"]
df.datetime=pd.to_datetime(df["datetime"])
df.drop(columns=["date","time"],inplace=True)




In [60]:
df.head(2)

Unnamed: 0,c/a,unit,scp,station,linename,division,desc,entries,exits,datetime
0,A002,R051,02-00-00,59 ST,NQR456W,BMT,REGULAR,7653692,2620310,2021-10-16 00:00:00
1,A002,R051,02-00-00,59 ST,NQR456W,BMT,REGULAR,7653704,2620320,2021-10-16 04:00:00


Now features are fine.

----

This code creates multindex that will help group dataframe and calculate absolute values for entries and exits instead of relevant.

In [61]:
df['stat_id']=df["station"]+" "+df["scp"]+" "+df["c/a"]
df=df.set_index(["stat_id","datetime"]).sort_index()

df

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1 AV 00-00-00 H007,2021-10-16 00:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644134,18145813
1 AV 00-00-00 H007,2021-10-16 04:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644151,18146053
1 AV 00-00-00 H007,2021-10-16 08:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644173,18146229
1 AV 00-00-00 H007,2021-10-16 12:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644247,18146605
1 AV 00-00-00 H007,2021-10-16 16:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644355,18147155
...,...,...,...,...,...,...,...,...,...,...
ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289
ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289
ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289
ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289


Some of the turnstile detectors were replaced/reseted. We can calculate that difference, but it will take a lot of time and since incorrect data is less than 0.5 percent of the dataset, more efficient solution is just drop it.

In [65]:
df.loc["125 ST 00-06-00 R258"].iloc[-60:-40]



Unnamed: 0_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2022-10-02 17:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,1863346609,1410543169
2022-10-02 21:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,1863346661,1410543282
2022-10-03 13:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275772569,324330907
2022-10-03 17:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275772802,324331132
2022-10-04 01:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275772931,324331363
2022-10-04 05:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275772935,324331394
2022-10-04 09:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275772980,324331921
2022-10-04 13:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275773000,324332194
2022-10-05 05:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275773227,324332636
2022-10-05 09:00:00,R258,R132,00-06-00,125 ST,456,IRT,REGULAR,275773365,324332903


Some of the data from turnstiles is reversed in the dataset. Let's add reverse for such cases in the code below. This method gets grouped dataframe and returns absolute values for entries and exits instead of relevant.

In [67]:
res=pd.DataFrame()
def calc(temp):
    #if data if reversed - changes the order and calculates diff()
    if temp['entries'].diff().sum()<0:
        ds = pd.Series(temp['entries'].values[::-1], temp['entries'].index)       
        temp["entries_abs"]=ds.diff()
    else:
        temp['entries_abs']=temp['entries'].diff()
        
    #if data if reversed - changes the order and calculates diff()
    if temp['exits'].diff().sum()<0:
        ds = pd.Series(temp['exits'].values[::-1], temp['exits'].index)       
        temp["exits_abs"]=ds.diff()
    else:
        temp['exits_abs']=temp['exits'].diff()
    return temp

In [68]:
df=df.groupby(level=0).apply(lambda x: calc(x))


In [69]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1 AV 00-00-00 H007,2021-10-16 00:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644134,18145813,,
1 AV 00-00-00 H007,2021-10-16 04:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644151,18146053,17.0,240.0
1 AV 00-00-00 H007,2021-10-16 08:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644173,18146229,22.0,176.0
1 AV 00-00-00 H007,2021-10-16 12:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644247,18146605,74.0,376.0
1 AV 00-00-00 H007,2021-10-16 16:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644355,18147155,108.0,550.0
...,...,...,...,...,...,...,...,...,...,...,...,...
ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0


Dropping turnstiles with negative values in diff()

In [70]:
df=df.drop(df.index[df['entries_abs'] < 0])
df=df.drop(df.index[df['exits_abs'] < 0])


In [71]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1 AV 00-00-00 H007,2021-10-16 00:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644134,18145813,,
1 AV 00-00-00 H007,2021-10-16 04:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644151,18146053,17.0,240.0
1 AV 00-00-00 H007,2021-10-16 08:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644173,18146229,22.0,176.0
1 AV 00-00-00 H007,2021-10-16 12:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644247,18146605,74.0,376.0
1 AV 00-00-00 H007,2021-10-16 16:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644355,18147155,108.0,550.0
...,...,...,...,...,...,...,...,...,...,...,...,...
ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0


In [72]:
df[df["entries_abs"]<0]

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [73]:
df[df["exits_abs"]<0]

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


Dropping all NaN values and changing size of dataset to one week.

In [74]:
df.isnull().sum()

c/a               0
unit              0
scp               0
station           0
linename          0
division          0
desc              0
entries           0
exits             0
entries_abs    5085
exits_abs      5085
dtype: int64

In [75]:
df.dropna(inplace=True)

In [76]:
df.isnull().sum()

c/a            0
unit           0
scp            0
station        0
linename       0
division       0
desc           0
entries        0
exits          0
entries_abs    0
exits_abs      0
dtype: int64

In [77]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
stat_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1 AV 00-00-00 H007,2021-10-16 04:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644151,18146053,17.0,240.0
1 AV 00-00-00 H007,2021-10-16 08:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644173,18146229,22.0,176.0
1 AV 00-00-00 H007,2021-10-16 12:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644247,18146605,74.0,376.0
1 AV 00-00-00 H007,2021-10-16 16:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644355,18147155,108.0,550.0
1 AV 00-00-00 H007,2021-10-16 20:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644522,18147790,167.0,635.0
...,...,...,...,...,...,...,...,...,...,...,...,...
ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0
ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R419,R326,00-05-01,ZEREGA AV,6,IRT,REGULAR,39,289,0.0,0.0


----

There is another problem. As you can see on the "entries" column, for the turnstile 01-05-00 at WORLD TRADE CTR Station, calculated number of entries was 2146956625, and in eight hours it changes to 8098. Probably turnstile was reseted or replased for some reasons. But that seriously affects absolute value calculation. I'll replace those values with mean for that station on the other day. The threshold to detect such fluctuations will be if that absolute value is greater than 15000. I follow the logic that it is phisically impossible to pass turnstile faster than for a second. For a period of 4 hours this gives us theoretical max amount of people at 4 * 60 * 60 = 14400.

In [117]:
df.loc["WORLD TRADE CTR 01-03-04 N094"].iloc[1855:1860]

Unnamed: 0_level_0,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-08-21 08:00:00,N094,R029,01-03-04,WORLD TRADE CTR,ACE23,IND,REGULAR,527005,183608,3.0,4.0
2022-08-21 12:00:00,N094,R029,01-03-04,WORLD TRADE CTR,ACE23,IND,REGULAR,2146956625,183611,2146430000.0,3.0
2022-08-21 20:00:00,N094,R029,01-03-04,WORLD TRADE CTR,ACE23,IND,REGULAR,527116,183624,45.0,6.0
2022-08-22 00:00:00,N094,R029,01-03-04,WORLD TRADE CTR,ACE23,IND,REGULAR,527129,183626,13.0,2.0
2022-08-22 04:00:00,N094,R029,01-03-04,WORLD TRADE CTR,ACE23,IND,REGULAR,527130,183627,1.0,1.0


I'll drop the index to find and replace these incorrect values with mean of their station respectively.

In [118]:
df.reset_index(inplace=True)

In [119]:
for x,k in df.iterrows():
    #if absolute value of people passed through turnstile is more than 15000, replacing this value with mean.
    if df.loc[x,"entries_abs"]>15000:
        mean=df[(df["entries_abs"]<15000) & (df["station"]==k[5])]["entries_abs"].mean()
        mean=int(mean)    
        df.loc[x,"entries_abs"]=mean

In [120]:
for x,k in df.iterrows():
    #if absolute value of people passed through turnstile is more than 15000, replacing this value with mean.
    if df.loc[x,"exits_abs"]>15000:
        mean=df[(df["exits_abs"]<15000) & (df["station"]==k[5])]["exits_abs"].mean()
        mean=int(mean)    
        df.loc[x,"exits_abs"]=mean

No incorrect data.

In [121]:
df[df["entries_abs"]>15000]

Unnamed: 0,stat_id,datetime,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs


In [122]:
df[df["exits_abs"]>15000]

Unnamed: 0,stat_id,datetime,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs


Changing entries_abs and exits_abs to int

In [123]:
df["entries_abs"]=df["entries_abs"].astype(int)
df["exits_abs"]=df["exits_abs"].astype(int)

In [124]:
df.head(1)

Unnamed: 0,stat_id,datetime,c/a,unit,scp,station,linename,division,desc,entries,exits,entries_abs,exits_abs
0,1 AV 00-00-00 H007,2021-10-16 04:00:00,H007,R248,00-00-00,1 AV,L,BMT,REGULAR,15644151,18146053,17,240


Now, let's drop: 
- entries and exits (we already used them)
- division - this feature contains information about station owner. This won't be useful for this project.
- c/a - this feature contains information about electrical station that supplies energy for current station. This won't be useful for this project either.
- desc - represent the "REGULAR" scheduled audit event. This won't be useful for this project.


In [125]:
df.drop(columns=["entries","exits","c/a","division","desc"],inplace=True)
df

Unnamed: 0,stat_id,datetime,unit,scp,station,linename,entries_abs,exits_abs
0,1 AV 00-00-00 H007,2021-10-16 04:00:00,R248,00-00-00,1 AV,L,17,240
1,1 AV 00-00-00 H007,2021-10-16 08:00:00,R248,00-00-00,1 AV,L,22,176
2,1 AV 00-00-00 H007,2021-10-16 12:00:00,R248,00-00-00,1 AV,L,74,376
3,1 AV 00-00-00 H007,2021-10-16 16:00:00,R248,00-00-00,1 AV,L,108,550
4,1 AV 00-00-00 H007,2021-10-16 20:00:00,R248,00-00-00,1 AV,L,167,635
...,...,...,...,...,...,...,...,...
10227247,ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R326,00-05-01,ZEREGA AV,6,0,0
10227248,ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R326,00-05-01,ZEREGA AV,6,0,0
10227249,ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R326,00-05-01,ZEREGA AV,6,0,0
10227250,ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R326,00-05-01,ZEREGA AV,6,0,0


In [139]:
cats = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['weekday'] = df['datetime'].dt.day_name()

df['weekday'] = pd.Categorical(df['weekday'], categories=cats, ordered=True)


df

Unnamed: 0,stat_id,datetime,unit,scp,station,linename,entries_abs,exits_abs,weekday
0,1 AV 00-00-00 H007,2021-10-16 04:00:00,R248,00-00-00,1 AV,L,17,240,Saturday
1,1 AV 00-00-00 H007,2021-10-16 08:00:00,R248,00-00-00,1 AV,L,22,176,Saturday
2,1 AV 00-00-00 H007,2021-10-16 12:00:00,R248,00-00-00,1 AV,L,74,376,Saturday
3,1 AV 00-00-00 H007,2021-10-16 16:00:00,R248,00-00-00,1 AV,L,108,550,Saturday
4,1 AV 00-00-00 H007,2021-10-16 20:00:00,R248,00-00-00,1 AV,L,167,635,Saturday
...,...,...,...,...,...,...,...,...,...
10227247,ZEREGA AV 00-05-01 R419,2022-10-14 05:00:00,R326,00-05-01,ZEREGA AV,6,0,0,Friday
10227248,ZEREGA AV 00-05-01 R419,2022-10-14 09:00:00,R326,00-05-01,ZEREGA AV,6,0,0,Friday
10227249,ZEREGA AV 00-05-01 R419,2022-10-14 13:00:00,R326,00-05-01,ZEREGA AV,6,0,0,Friday
10227250,ZEREGA AV 00-05-01 R419,2022-10-14 17:00:00,R326,00-05-01,ZEREGA AV,6,0,0,Friday


Saving file to csv.

In [140]:
df.to_csv('../data/df_cleaned_year.csv',index=False)

In [141]:
dd=pd.read_csv('../data/df_cleaned_year.csv',index_col="datetime")
dd

  dd=pd.read_csv('../data/df_cleaned_year.csv',index_col="datetime")


Unnamed: 0_level_0,stat_id,unit,scp,station,linename,entries_abs,exits_abs,weekday
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-10-16 04:00:00,1 AV 00-00-00 H007,R248,00-00-00,1 AV,L,17,240,Saturday
2021-10-16 08:00:00,1 AV 00-00-00 H007,R248,00-00-00,1 AV,L,22,176,Saturday
2021-10-16 12:00:00,1 AV 00-00-00 H007,R248,00-00-00,1 AV,L,74,376,Saturday
2021-10-16 16:00:00,1 AV 00-00-00 H007,R248,00-00-00,1 AV,L,108,550,Saturday
2021-10-16 20:00:00,1 AV 00-00-00 H007,R248,00-00-00,1 AV,L,167,635,Saturday
...,...,...,...,...,...,...,...,...
2022-10-14 05:00:00,ZEREGA AV 00-05-01 R419,R326,00-05-01,ZEREGA AV,6,0,0,Friday
2022-10-14 09:00:00,ZEREGA AV 00-05-01 R419,R326,00-05-01,ZEREGA AV,6,0,0,Friday
2022-10-14 13:00:00,ZEREGA AV 00-05-01 R419,R326,00-05-01,ZEREGA AV,6,0,0,Friday
2022-10-14 17:00:00,ZEREGA AV 00-05-01 R419,R326,00-05-01,ZEREGA AV,6,0,0,Friday


- Add day of week
- Download by year