# This code downloads pollutant, traffic toll, and weather data, and then transforms all the data to a consistent format

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import datetime
import glob
import warnings
import requests
import json
from bs4 import BeautifulSoup
import zipfile
import io
import xmltodict
from MesoPy import Meso
from bisect import bisect_left
from scipy import interpolate
import pickle

data_path = './data'

In [2]:
class save_load_pkl: 
    """
    Class for saving and loading .pkl files. 
    """
        
    def __init__(self):
        self
    
    def save_obj(obj,name): 
        """
        Function for saving .pkl files. 
        obj = object that you want to save. 
        name = filepath/filename to which you want to save the object. 
        """
        with open(name, 'wb') as f:
            pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        
    def load_obj(name):
        """
        Function for loading data from .pkl files
        name = filepath/filename from which you want to load the object. 
        """
        with open(name, 'rb') as f:
            return pickle.load(f)

# 1. Deal with pollutant data

In [3]:
def retrieve_pollutant_obs(data_destination):
    print('I cannot download through URL API requests at the moment (API is down). I will have to do it manually.')
    print()
    print('Instructions for manual download:')
    print(' 1. Navigate to: https://aqs.epa.gov/api')
    print(' 2. Set up an account. Enter your email and you will be emailed a password.')
    print(' 3. Enter the following information:')
    print('    a. Username/Password')
    print('    b. Query Type: rawDataNotify')
    print('    c. Output Format: DMCSV')
    print('    d. Parameter Class: AQI Pollutants')
    print('    e. Parameter Code: see above--each value in the pollutant_code list above')
    print('    f. Begin Date: 20020101')
    print('    g. End Date: 20171231')
    print('    h. State Code: 36 - New York')
    print('    i. County Code: 081 - Queens')
    print('    j. Site: 0124 - QUEENS COLLEGE 2')
    print('    k. Duration: H - 5 MINUTE')
    print(' 4. Then hit "SUBMIT"')
    print()
    print('You will receive an email when the data is ready for download. Follow instructions to download.')
    print('Download all the data files and place them in the following directory:')
    print(data_destination)


def transform_pollutant_obs(data_path='./data'):
    """
    Function transforms pollutant data to standard format. 
    INPUT: 
    data_path = path to directory in which raw pollutant data is stored. Also path to which 
                pollutant data is saved after transformation. 
    OUTPUT: 
    path/file_name to which transformed data is saved (is a .csv file)
    """
    # Initialize DataFrame with time vector... every hour from st_time to ed_time. 
    st_time = datetime.datetime(year=2002,month=1,day=1,hour=0,minute=0,second=0) 
    ed_time = datetime.datetime(year=2017,month=12,day=31,hour=23,minute=59,second=59) 
    delta_time = datetime.timedelta(hours=1)
    all_times = np.arange(st_time,ed_time,delta_time)
    poll_data = pd.DataFrame({
        'time_utc': all_times,
    })

    # List of files with pollutant data, dictionary to map pollutant long names to short names. 
    aq_files = glob.glob(os.path.join(data_path,'AQDM*'))
    gas_name_dict = {
        'Ozone': 'O3',
        'Carbon monoxide': 'CO',
        'Nitrogen dioxide (NO2)': 'NO2', 
        'PM2.5 - Local Conditions': 'PM2.5',
        'Sulfur dioxide': 'SO2'
    }

    # Collect important gas data from each gas data file, place in poll_data dataframe. 
    for file_name in aq_files: 

        gas_data = pd.read_csv(file_name, dtype=str)[:-1]    
        gas_name = gas_data['AQS Parameter Desc'][10]

        # Extract useful gas data. 
        single_gas = pd.DataFrame({})
        single_gas['time_utc'] = [datetime.datetime.strptime(date+'_'+hr,'%Y-%m-%d_%H:%M') for date,hr in zip(gas_data['Date GMT'],gas_data['24 Hour GMT'])]
        single_gas[gas_name_dict[gas_name]+'_val'] = gas_data['Sample Measurement'].apply(float)
        single_gas[gas_name_dict[gas_name]+'_limit'] = gas_data['Detection Limit'].apply(float)
        single_gas[gas_name_dict[gas_name]+'_unit'] = gas_data['Units of Measure']
        single_gas[gas_name_dict[gas_name]+'_instr'] = gas_data['Method Description'] 

        # Add useful data to larger structure via a left join on the measurement time. 
        poll_data = poll_data.join(single_gas.set_index('time_utc'),on='time_utc',how='left')
        
    # Collect basic information on the air quality measurement station. 
    station_info = pd.DataFrame({
        'lat': gas_data['Latitude'].loc[10],
        'lon': gas_data['Longitude'].loc[10],
        'state_code': gas_data['State Code'].loc[10],
        'county_code': gas_data['County Code'].loc[10],
        'site_number': gas_data['Site Num'].loc[10]
    },index=[0])

    # Save the DataFrame as a CSV. 
    poll_data.to_csv(os.path.join(data_path,'poll_data_200201010500_201811011833.csv'),na_rep='NaN',index=False)
    
    # Also save basic station info to CSV. 
    station_info.to_csv(os.path.join(data_path,'pollutant_station_info.csv'),na_rep='NaN',index=False)
    
    return os.path.join(data_path,'poll_data_200201010500_201811011833.csv')

    

In [4]:
# Download the data
retrieve_pollutant_obs(data_path)

# Transform into consistent format. 
poll_data_path = transform_pollutant_obs(data_path='./data')


I cannot download through URL API requests at the moment (API is down). I will have to do it manually.

Instructions for manual download:
 1. Navigate to: https://aqs.epa.gov/api
 2. Set up an account. Enter your email and you will be emailed a password.
 3. Enter the following information:
    a. Username/Password
    b. Query Type: rawDataNotify
    c. Output Format: DMCSV
    d. Parameter Class: AQI Pollutants
    e. Parameter Code: see above--each value in the pollutant_code list above
    f. Begin Date: 20020101
    g. End Date: 20171231
    h. State Code: 36 - New York
    i. County Code: 081 - Queens
    j. Site: 0124 - QUEENS COLLEGE 2
    k. Duration: H - 5 MINUTE
 4. Then hit "SUBMIT"

You will receive an email when the data is ready for download. Follow instructions to download.
Download all the data files and place them in the following directory:
./data


# 2. Deal with traffic data

In [5]:
# The entire process in a single parent function: 

def retrieve_toll_data(toll_data_dir,web_addr='http://web.mta.info/developers/data/bandt/trafficdata.html'): 
    """
    Function used to download all traffic data, place it in a simple format. 
    Input: 
           toll_data_dir = directory where we want to save all the traffic data.
           web_addr = address of webpage where toll data is found. 
    Output: 
           save_path_file_name = path/fName for all the downloaded toll data. 
    """
    
    # Functions: =====================================================================================
    # Used in this function. Built specifically for the extraction process.  
    
    # Download and extract zip files. 
    def download_extract_zip(retrieval_url,destination):
        r = requests.get(retrieval_url)
        z = zipfile.ZipFile(io.BytesIO(r.content))
        z.extractall(destination)
        return
    
    # Download unarchived toll data from a specific .xml file. 
    def get_xml_traffic_data(xml_file):
        """
        Input: 
                xml_file = web-address for traffic data xml file. toll_addr links to these addresses. 
        Output: 
                list_inst_date,list_inst_id,list_inst_cc,list_inst_ec
                Lists of the date, toll_id, cash-count, and etc-count values for each day/facility pair 
                in the unarchived data. 
        """

        # Initialize lists to store instances for this file
        li_date_f,li_id_f,li_cc_f,li_ec_f = [],[],[],[]

        # Retrieve the data. If this fails, move on to the next file. 
        contents = requests.get(xml_file).text

        # convert into a dictionary. If this isn't possible, it means the file is probably bad so we should
        # move on to the next file. 
        xml_dict = xmltodict.parse(contents)

        # Use the dictionary to pull out attribute values for each instance. 
        for trans_sum_ind in range(len(xml_dict['message']['TransSummary'])): 
            for facil_ind in range(len(xml_dict['message']['TransSummary'][trans_sum_ind]['facility'])):
                list_inst_date.append(xml_dict['message']['TransSummary'][trans_sum_ind]['@Date']);
                try: # This work in most cases--unless only one facility on that particular day. 
                    list_inst_id.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility'][facil_ind]['@id']);
                    list_inst_cc.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility'][facil_ind]['@cash-count']);
                    list_inst_ec.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility'][facil_ind]['@etc-count']);
                except KeyError: 
                    # If there's only one facility in the 'TransSummary', will not have a list of facilities 
                    # in xml_dict['message']['TransSummary'][trans_sum_ind]['facility']. Will just have the 
                    # three fields, '@id', '@cash-count', and '@etc-count'. We need to add the instance for 
                    # that single facility and then move on to the next day's TransSummary. 
                    list_inst_id.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility']['@id']);
                    list_inst_cc.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility']['@cash-count']);
                    list_inst_ec.append(xml_dict['message']['TransSummary'][trans_sum_ind]['facility']['@etc-count']);
                    break # move to next iteration of the middle loop. 
        
        return li_date_f,li_id_f,li_cc_f,li_ec_f
        #return list(map(int, li_date_f)),list(map(int, li_id_f)),list(map(int, li_cc_f)),list(map(int, li_ec_f))

    
    # Part 1: ========================================================================================
    # This section retrieves links to ALL the data I want to collect. 
    # Archived data is in a .zip file. 
    # Non-archived data stored in pages with suffixes, .xml

    # Create list of accesskey attributes the webpage. These contain the links to the data we want. 
    html = requests.get(web_addr).text
    soup = BeautifulSoup(html,'html5lib')
    all_links_unk = soup.find_all('a')
    all_links = [str(link)[3:-4] for link in all_links_unk if str(link)[3:7]=='href']
    links_splt_gt = [link.split(">") for link in all_links]

    # Identify entries that contain links to the archived data: these addresses end in ".zip " 
    arch_data_links = [lsgt[0][6:-2] for lsgt in links_splt_gt if lsgt[0][-6:-2]=='.zip']

    # Identify entries that contain links to un-archived data: these addresses end in ".xml"
    # Note that these links aren't complete--to complete, need to concatenate base_address to the front. 
    base_address = 'http://web.mta.info/developers/data/bandt/'
    unarch_data_links = [base_address+(lsgt[0][6:-1]) for lsgt in links_splt_gt if lsgt[0][-5:-1]=='.xml'] 

    
    # Part 2: ========================================================================================
    # Download and extract the archived data from its .zip files. 

    for aa in arch_data_links: 
        download_extract_zip(aa,toll_data_dir)
    
    # Part 3: ========================================================================================
    # Place data from xml files in a dataframe. 

    # List of errors the exception takes care of: 
    # ExpatError: If there's an error in the .xml file (not my fault, it was written wrong I think?), get 
                # ExpatError with xml_dict = xmltodict.parse(contents). 
    # TypeError: If the .xml file has only <message> </message> so xml_dict['message']['TransSummary']
                # returns a TypeError. 
    # Fortunately, no files that trigger these exceptions have data. We can just skip them! 
    
    # Initialize lists to store ALL instances from ALL files. 
    list_inst_date,list_inst_id,list_inst_cc,list_inst_ec = [],[],[],[]

    # Use function to pull data out of .xml files, place data in lists. Then append the lists to 
    # those that hold data for ALL instances from ALL files. 
    for xml_file in unarch_data_links:
        
        try: 
            li_date_f,li_id_f,li_cc_f,li_ec_f = get_xml_traffic_data(xml_file)
        except: 
            continue
            
        list_inst_date = list_inst_date+li_date_f
        list_inst_id   = list_inst_id+li_id_f
        list_inst_cc   = list_inst_cc+li_cc_f
        list_inst_ec   = list_inst_ec+li_ec_f

    # Place lists in dict, convert into dataframe. 
    toll_ct_dict = {}
    toll_ct_dict['date'],toll_ct_dict['toll_id'],toll_ct_dict['cash-count'],toll_ct_dict['etc-count'] = \
        list_inst_date,list_inst_id,list_inst_cc,list_inst_ec;
    unarch_toll_df = pd.DataFrame(data=toll_ct_dict)
    
    
    # Part 4: ========================================================================================
    # Now combine the archived and unarchived data into a single dataframe. 

    # Retrieve the archived data from its storage place. 
    arch_data_pathFile = os.path.join(toll_data_dir,'TBTA_DAILY_PLAZA_TRAFFIC.csv')
    arch_toll_df = pd.read_csv(arch_data_pathFile)

    # Re-order attributes in archived dataframe so that it's consistent with the order of the columns in 
    # the unarchived dataframe. Also re-name each attribute for consistency, to avoid confusing Pandas. 
    arch_toll_df = arch_toll_df[['DATE','PLAZAID','CASH','ETC']]
    arch_toll_df = arch_toll_df.rename(index=str, columns={"DATE": "date", "PLAZAID": "toll_id","CASH": "cash-count", "ETC": "etc-count"})

    # Join by appending. 
    unarch_toll_df.append(arch_toll_df,sort=False)

    # Save all the toll data in a single CSV file. 
    save_path_file_name = os.path.join(toll_data_dir,'all_toll_data.csv')
    unarch_toll_df.to_csv(path_or_buf=save_path_file_name, sep=',')
    
    print('All traffic data saved to...',save_path_file_name)
    
    return save_path_file_name


def transform_toll_data(toll_data_path,std_time_list):

    def find_eq(a,val):
        return [idx for idx in range(len(a)) if a[idx] == val]
    
    raw_toll = pd.read_csv(toll_data_path,index_col=0,dtype=str)
    raw_toll['date'] = raw_toll['date'].apply(lambda x: datetime.datetime.strptime(x,'%m/%d/%Y'))

    # Find all unique dates and toll indices. Needed to restructure the toll data. 
    toll_dates = list(set(raw_toll['date']))
    toll_ids   = sorted(list(set(raw_toll['toll_id'])))

    # Initialize dictionary to hold restructured data. 
    struct_toll = {'date':toll_dates}

    # Initialize columns that will hold the # of cash and electronic tolls for each toll, for each day. 
    for tid in toll_ids: 
        struct_toll['cash_'+str(tid)] = [np.nan]*len(struct_toll['date'])
        struct_toll['etc_'+str(tid)] = [np.nan]*len(struct_toll['date'])

    # Now fill those initialized columns with the cash and etc-counts for each toll, for each day. 
    for ind,row in raw_toll.iterrows(): 
        date,tid,cash,etc = row['date'],row['toll_id'],row['cash-count'],row['etc-count']
        struct_ind = find_eq(struct_toll['date'],date)[0]
        struct_toll['cash_'+str(tid)][struct_ind] = cash
        struct_toll['etc_'+str(tid)][struct_ind] = etc

    # Convert to dataframe
    struct_toll = pd.DataFrame(struct_toll).sort_values('date')
    
    # Place the data in the standard format. 
    toll_data = pd.DataFrame({
        'time_utc': list(std_time_list)
    })
    toll_data['approx_loctime'] = toll_data['time_utc'].apply(lambda x: x-datetime.timedelta(hours=5)) 
    toll_data['date'] = toll_data['approx_loctime'].apply(lambda x: datetime.datetime(year=x.year,month=x.month,day=x.day))
    toll_data = toll_data.merge(struct_toll,how='left',on=['date'])
    toll_data = toll_data.drop(labels=['date','approx_loctime'],axis=1)

    # Save toll_data to CSV
    poll_data.to_csv(os.path.join(data_path,'toll_data_200201010500_201811011833.csv'),na_rep='NaN',index=False)
    
    return os.path.join(data_path,'toll_data_200201010500_201811011833.csv')



In [6]:
# Retrieve the data: 
toll_web_addr = "http://web.mta.info/developers/data/bandt/trafficdata.html"
raw_toll_path = retrieve_toll_data(data_path,toll_web_addr)

# Transform the data: 
poll_data = pd.read_csv(poll_data_path,dtype=str)
toll_data_path = transform_toll_data(raw_toll_path,poll_data['time_utc'].apply(
                                     lambda x: pd.Timestamp(x)))


All traffic data saved to... ./data/all_toll_data.csv


# 3. Deal with weather data

In [7]:
# Function to retrieve observations. 
def retr_wxobs_synopticlabs(api_key,station_ids,data_path):
    """
    Function to retrieve weather observations from various observation sites. Uses the 
    MesoWest/SynopticLabs API to retrieve the observations. 
    
    INPUT: 
        api_key: SynopticLabs api_key. 
        station_ids: List of four-letter station ids; one for each station for which we want to retrieve
                     observations. 
        data_path: Path to directory in which we want to save the observations. 
        
    OUTPUT: 
        path_name_list: list of file path/name for all files in which weather data is stored. 
    
    """
        
    # Generate token
    request_generate_token = 'http://api.mesowest.net/v2/auth?apikey='+api_key_synopticlabs
    api_out = requests.get(request_generate_token).text
    token_dict = json.loads(api_out)
    token_synopticlabs = token_dict['TOKEN']

    # Set some key parameters for retrieval: 
    # 1. Start time for observations.  
    # 2. End time for observations. 
    # 3. Variables to retrieve. 
    # 4. Shortened names for retrieved variables. 
    # 5. General info about the wx station. 
    
    st_time = '200201010500'
    ed_time = '201811011833'

    vbl_list = ['date_time','air_temp_set_1','dew_point_temperature_set_1d',
                'relative_humidity_set_1','wind_speed_set_1','wind_gust_set_1','wind_direction_set_1',
                'sea_level_pressure_set_1d','precip_accum_one_hour_set_1','weather_condition_set_1d',
                'visibility_set_1','ceiling_set_1','cloud_layer_1_code_set_1','cloud_layer_2_code_set_1',
                'cloud_layer_3_code_set_1']

    new_vbl_names = ['time','temp','dewpoint','RH','wind_speed','wind_gust','wind_dir',
                    'sl_pres','precip_1hr','wx_cond','visibility',
                    'cld_ceiling','cldlayer_1','cldlayer_2','cldlayer_3']

    station_attrs = ['STID','ELEVATION','NAME','LONGITUDE','LATITUDE']
    
    # List to hold file path/name for each station's observations. 
    path_name_list = []

    # Retrieve observations from each station. 
    for station_id in wx_stations:

        # Retrieve the weather observations from API. 
        m = Meso(token=token_synopticlabs)
        data_ts = m.timeseries(stid=station_id, start=st_time, end=ed_time)    

        # Place data in a simpler structure, which contains only desired attributes and variables. 
        obs_dict = {}

        obs_dict['station_info'] = {}
        for attr in station_attrs: 
            obs_dict['station_info'][attr] = data_ts['STATION'][0][attr]

        temporary_dict = {}
        for vbl,new_vbl in zip(vbl_list,new_vbl_names):
            temporary_dict[new_vbl] = data_ts['STATION'][0]['OBSERVATIONS'][vbl]
        obs_df = pd.DataFrame(temporary_dict)
        obs_dict['wx_obs'] = obs_df

        obs_dict['units'] = data_ts['UNITS']

        obs_dict['qc_summary'] = data_ts['QC_SUMMARY']

        # Save the dictionary
        file_name = 'wxobs_'+station_id+'_'+st_time+'_'+ed_time+'.npy'
        np.save(os.path.join(data_path,file_name),obs_dict) 
        
        # Add file path/name to dictionary. 
        path_name_list.append(os.path.join(data_path,file_name))

    return path_name_list



def transform_wx_data(wx_raw_path,station_names,std_time_list):
    """
    Transforms weather data from each station to the standard format. 
    INPUT: 
        wx_raw_path: List of file name/paths for raw weather data. 
        station_names: List of four-letter codes for each station. 
    OUTPUT: 
        wx_data_paths: names/paths for csv files that hold transformed data.  
    """
    
    def find_closest(myList, myNumber):
        """
        Assumes myList is sorted. Returns closest value to myNumber, index of that value. 

        If two numbers are equally close, return the smallest number.
        """
        pos = bisect_left(myList, myNumber)
        if pos == 0:
            return myList[0],pos
        if pos == len(myList):
            return myList[-1],pos
        before = myList[pos - 1]
        after = myList[pos]
        if after - myNumber < myNumber - before:
           return after,pos
        else:
           return before,pos-1
    
    try: 
        all_stations = []
        for path in wx_raw_path: 
            all_stations.append(np.load(path).item())    
    except: 
        kjfk = np.load(os.path.join(data_path,'wxobs_kjfk_200201010500_201811011833.npy')).item()
        klga = np.load(os.path.join(data_path,'wxobs_klga_200201010500_201811011833.npy')).item()
        knyc = np.load(os.path.join(data_path,'wxobs_knyc_200201010500_201811011833.npy')).item()
        all_stations = [kjfk,klga,knyc]

    wx_data_paths = []
        
    for station, station_name in zip(all_stations,station_names):

        # Convert timestamp to datetime, sort. 
        wx_obs = station['wx_obs']
        wx_obs['time'] = wx_obs['time'].apply(lambda x: datetime.datetime.strptime(x,'%Y-%m-%dT%H:%M:%SZ'))
        wx_obs = wx_obs.sort_values('time')

        # Use nearest-neighbor interpolation to determine whether conditions at each time for which 
        # we have pollutant data. 

        # Prepare the data for interpolation...
        # 1. Create new time vectors--hours since start of year 2000. Necessary for interpolation
        time_del = datetime.timedelta(hours=1)
        time_base = datetime.datetime(year=2000,month=1,day=1,hour=0,minute=0,second=0)
        h2000_obs = list(wx_obs['time'].apply(lambda x: (x-time_base)/time_del))
        h2000_utc = list(std_time_list.apply(lambda x: (x-time_base)/time_del))

        # 2. Use label encoder to encode wx_cond... so that we can interpolate.
        wx_obs['wx_cond'].fillna(value=pd.np.nan, inplace=True) # Get rid of <None> entries -> replace with NaN. 
        wx_obs['wx_cond'] = wx_obs['wx_cond'].replace(to_replace=np.nan,value='N/A') # Replace all NaN's with N/A
        from sklearn import preprocessing
        le = preprocessing.LabelEncoder()
        wx_obs['wx_cond_encode'] = le.fit_transform(wx_obs['wx_cond'])

        # 3. Prepare the h2000_utc vector for interpolation -> remove all times that aren't within 30 mins of a wx_obs time. 
        h2000_interp = []
        for time in h2000_utc: 
            time_near,time_near_idx = find_closest(h2000_obs, time)
            if time - time_near <= 0.5 and time-time_near > -0.5:
                h2000_interp.append(time)

        # Convert h2000_interp from hours after 2000 to datetime. 
        time_utc_interped = [time_base + time_del*h2000 for h2000 in h2000_interp]

        # Now interpolate!  
        wx_data_interped = pd.DataFrame({'time_utc':time_utc_interped})
        obs_to_interp = ['temp', 'dewpoint', 'RH', 'wind_speed', 'wind_gust', 'wind_dir',
                         'sl_pres', 'precip_1hr', 'visibility', 'cld_ceiling',
                         'cldlayer_1', 'cldlayer_2', 'cldlayer_3', 'wx_cond_encode']

        for obs_name in obs_to_interp: 
            f = interpolate.interp1d(h2000_obs,list(wx_obs[obs_name]),kind='nearest',bounds_error=False,fill_value=np.nan)
            wx_data_interped[obs_name] = f(np.asarray(h2000_interp))

        # Convert encoded wx_cond to wx_cond names. 
        wx_data_interped['wx_cond'] = le.inverse_transform(wx_data_interped['wx_cond_encode'].apply(int))

        # Use left join to make format consistent with pollutant data. 
        wx_data = pd.DataFrame({'time_utc': list(std_time_list)})
        wx_data = wx_data.merge(wx_data_interped,how='left',on='time_utc',sort=True)

        # Save to csv
        wx_data.to_csv(os.path.join(data_path,'wx_data_'+station_name+'_200201010500_201811011833.csv'),na_rep='NaN',index=False)

        wx_data_paths.append(os.path.join(data_path,'wx_data_'+station_name+'_200201010500_201811011833.csv'))
        
    return wx_data_paths



In [8]:
# Retrieve the data. 
from api_keys import api_key_synopticlabs
wx_stations = ['kjfk','klga','knyc']
wx_raw_path = retr_wxobs_synopticlabs(api_key_synopticlabs,wx_stations,data_path)

In [9]:
# Transform the data
wx_data_paths = transform_wx_data(wx_raw_path,wx_stations,poll_data['time_utc'].apply(
                                     lambda x: pd.Timestamp(x)))