## A file that convert business hours data timestamp into unix format

In [1]:
import pandas as pd
from datetime import datetime

In [2]:
input_filepath = "storestatus.csv"
output_filepath = 'data/storestatus.csv'

In [3]:
file = pd.read_csv(input_filepath)
file.head()

Unnamed: 0,store_id,status,timestamp_utc
0,8419537941919820732,active,2023-01-22 12:09:39.388884 UTC
1,54515546588432327,active,2023-01-24 09:06:42.605777 UTC
2,8377465688456570187,active,2023-01-24 09:07:26.441407 UTC
3,5955337179846162144,active,2023-01-24 09:08:07.634899 UTC
4,1169347689335808384,active,2023-01-24 09:08:18.436854 UTC


In [4]:
# Function to convert the custom timestamp to Unix timestamp
def convert_to_unix(timestamp_str):
    # List of potential formats
    formats = ['%Y-%m-%d %H:%M:%S.%f %Z', '%Y-%m-%d %H:%M:%S %Z']

    for fmt in formats:
        try:
            timestamp_obj = datetime.strptime(timestamp_str, fmt)
            return timestamp_obj.timestamp()
        except ValueError:
            continue

    raise ValueError(f"Time data '{timestamp_str}' does not match known formats.")

In [5]:
result = []
start_id = 1

for chunk in pd.read_csv(input_filepath, chunksize=50000):
    # Convert the 'timestamp_utc' column
    chunk['timestamp_utc'] = chunk['timestamp_utc'].apply(convert_to_unix)

    # Add the 'id' column with incrementing values
    chunk['id'] = range(start_id, start_id + len(chunk))
    start_id += len(chunk)

    # Reorder columns to place 'id' at the beginning
    chunk = chunk[['id'] + [col for col in chunk if col != 'id']]
    
    result.append(chunk)

In [6]:
df = pd.concat(result, axis=0)
df.head()

Unnamed: 0,id,store_id,status,timestamp_utc
0,1,8419537941919820732,active,1674370000.0
1,2,54515546588432327,active,1674531000.0
2,3,8377465688456570187,active,1674531000.0
3,4,5955337179846162144,active,1674531000.0
4,5,1169347689335808384,active,1674531000.0


In [None]:
df.to_csv(output_filepath, index=False)