# Timestamp parsing for time-series data analysis with Pandas and Python

Source code from Medium's article ["Timestamp parsing for time-series data analysis with Pandas and Python"](https://towardsdatascience.com/timestamp-parsing-with-python-ec185536bcfc), written by [Sudeep Gowrishankar](https://sudeep-g.medium.com/).

# Import Libraries

In [1]:
from datetime import datetime, timedelta

from time import strptime
from time import strftime

import pandas as pd

# 1. time.strptime

In [2]:
ts_str = "2000-03-10 15:43:10"

ts = strptime(ts_str, "%Y-%m-%d %H:%M:%S")

print(ts)

time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=43, tm_sec=10, tm_wday=4, tm_yday=70, tm_isdst=-1)


# 2. Pandas.to_datetime without inferring

In [3]:
ts_str = "2000-03-10 15:43:10"

ts = pd.to_datetime(ts_str)

print(ts)

2000-03-10 15:43:10


In [4]:
ts_str_list = ["2000-03-10 15:43:10", "2000/03/10 15:44"]

ts_list = pd.to_datetime(ts_str_list)

print(list(ts_list))

[Timestamp('2000-03-10 15:43:10'), Timestamp('2000-03-10 15:44:00')]


# 3. Pandas.to_datetime with inferring

In [5]:
ts_str_list = ["2000-03-10 15:43:10", "2000-03-10 15:44", "2000-03-10 15:45"]

ts_list = pd.to_datetime(ts_str_list, infer_datetime_format=True)

print(list(ts_list))

[Timestamp('2000-03-10 15:43:10'), Timestamp('2000-03-10 15:44:00'), Timestamp('2000-03-10 15:45:00')]


# 4. Pandas.to_datetime with a specified format argument

In [6]:
ts_str_list = ["2000-03-10 15:43:10", "2000-03-10 15:44:05", "2000-03-10 15:45:54"]

ts_list = pd.to_datetime(ts_str_list, format="%Y-%m-%d %H:%M:%S")

print(list(ts_list))

[Timestamp('2000-03-10 15:43:10'), Timestamp('2000-03-10 15:44:05'), Timestamp('2000-03-10 15:45:54')]


# 5. time.strptime with memoization

In [7]:
ts_str_list = ["2000-03-10 15:43", "2000-03-10 15:44", "2000-03-10 15:45",
               "2000-03-10 15:43", "2000-03-10 15:44", "2000-03-10 15:45"]

memos = dict()
ts_list = []

for x in ts_str_list:
    
    if x not in memos:
        
        d = strptime(x, "%Y-%m-%d %H:%M")
        memos[x] = d
        ts_list.append(d)
        
    else:
        
        ts_list.append(memos[x])
    
print(ts_list)
print(memos)

[time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=43, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=44, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=45, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=43, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=44, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=45, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1)]
{'2000-03-10 15:43': time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=43, tm_sec=0, tm_wday=4, tm_yday=70, tm_isdst=-1), '2000-03-10 15:44': time.struct_time(tm_year=2000, tm_mon=3, tm_mday=10, tm_hour=15, tm_min=44, tm_sec=0, tm_wday=4, 

# 6. Pre-built lookup mapping

In [13]:
ts_str_list = ["2000-03-10 00:02", "2000-03-10 00:03", "2000-03-10 00:05",
               "2000-03-10 00:07", "2000-03-10 00:11", "2000-03-10 00:13",
               "2000-03-10 00:17", "2000-03-19 00:02", "2000-03-10 00:23"]

lookup_map = dict()
start_time = datetime(year=2000, month=3, day=10, hour=0, minute=0)
end_time =   datetime(year=2000, month=3, day=10, hour=0, minute=30)

timestamp = start_time

while timestamp <= end_time:
    
    lookup_map[timestamp.strftime("%Y-%m-%d %H:%M")] = timestamp
    timestamp += timedelta(minutes=1)
    
ts_list = list(map(lambda x: lookup_map.get(x, None), ts_str_list))

print(ts_list)

[datetime.datetime(2000, 3, 10, 0, 2), datetime.datetime(2000, 3, 10, 0, 3), datetime.datetime(2000, 3, 10, 0, 5), datetime.datetime(2000, 3, 10, 0, 7), datetime.datetime(2000, 3, 10, 0, 11), datetime.datetime(2000, 3, 10, 0, 13), datetime.datetime(2000, 3, 10, 0, 17), None, datetime.datetime(2000, 3, 10, 0, 23)]
