In [1]:
# Import Dependencies
import pandas as pd
from datetime import datetime
from pytz import timezone, all_timezones
import json

In [2]:
# Create DF from CSV file
df = pd.read_csv('data/gps_with_location_info.csv')
df = df.drop(['Unnamed: 0'], axis=1) #drop index column
df['time'] = pd.to_datetime(df['time']) #change 'time' column type to datetime64 from python object

df.head()

Unnamed: 0,track,time,alt,lat,long,city,state,country
0,0,2019-05-08 06:57:15+00:00,741.08,36.111288,-115.261516,Spring Valley,Nevada,US
1,0,2019-05-08 15:27:00+00:00,754.07,36.111416,-115.261345,Spring Valley,Nevada,US
2,0,2019-05-08 15:27:08+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US
3,0,2019-05-08 15:27:09+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US
4,0,2019-05-08 15:28:09+00:00,750.0,36.11133,-115.261409,Spring Valley,Nevada,US


In [3]:
# Clean incorrect MY data - replace with SG
# https://kanoki.org/2019/07/17/pandas-how-to-replace-values-based-on-conditions/
df.loc[(df['country'] == 'MY'),'city']='Singapore'
df.loc[(df['country'] == 'MY'),'state']='NaN'
df.loc[(df['country'] == 'MY'),'country']='SG'

df.head()

Unnamed: 0,track,time,alt,lat,long,city,state,country
0,0,2019-05-08 06:57:15+00:00,741.08,36.111288,-115.261516,Spring Valley,Nevada,US
1,0,2019-05-08 15:27:00+00:00,754.07,36.111416,-115.261345,Spring Valley,Nevada,US
2,0,2019-05-08 15:27:08+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US
3,0,2019-05-08 15:27:09+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US
4,0,2019-05-08 15:28:09+00:00,750.0,36.11133,-115.261409,Spring Valley,Nevada,US


In [4]:
# Load country_timezones json and make DF
countryTZs = json.load(open('data/country_timezones.json', 'r')) #import all timezone data
dfAllTimezones = pd.DataFrame(countryTZs['countries']) #make it a dataframe

# Figure out which values are needed from DF^ based on GPS data
countries = df['country'].unique() #unique country codes
dfTZ = dfAllTimezones.query('@countries in code') #find all data for the unique country codes
dfTZ = dfTZ.reset_index(drop=True)

dfTZ.head()

Unnamed: 0,timezones,code,continent,name,capital
0,[Europe/Vienna],AT,Europe,Austria,Vienna
1,"[Australia/Lord_Howe, Australia/Hobart, Austra...",AU,Oceania,Australia,Canberra
2,[Europe/Brussels],BE,Europe,Belgium,Brussels
3,"[America/St_Johns, America/Halifax, America/Gl...",CA,North America,Canada,Ottawa
4,[Europe/Berlin],DE,Europe,Germany,Berlin


In [5]:
# Clean European Timezones (fucking imperialism) and reduce to 1
def cleanEuropeTZ(df, code):
    euroTZ = df.loc[df['code'] == code]['timezones'].apply(lambda x: findEuropeTZ(x)) # Find the appropriate df line based on the code in and then save the correct timezone
    df.loc[euroTZ.keys(),'timezones'] = euroTZ.tolist() # replace the column data with a listized version of the timezone
    return df

# Figure out which timezone is the correct one
def findEuropeTZ(x):
    # Make sure list has more than 1 item
    if len(x)>1:
        for i in x:
            if 'Europe' in i:
                return [i] # return the only tz
    # Otherwise return original list
    else:
        return x


# For the appropriate European countries, clean their timezones
for country in ['ES','PT']:
    dfTZ = cleanEuropeTZ(dfTZ, country)

dfTZ

Unnamed: 0,timezones,code,continent,name,capital
0,[Europe/Vienna],AT,Europe,Austria,Vienna
1,"[Australia/Lord_Howe, Australia/Hobart, Austra...",AU,Oceania,Australia,Canberra
2,[Europe/Brussels],BE,Europe,Belgium,Brussels
3,"[America/St_Johns, America/Halifax, America/Gl...",CA,North America,Canada,Ottawa
4,[Europe/Berlin],DE,Europe,Germany,Berlin
5,[Europe/Copenhagen],DK,Europe,Denmark,Copenhagen
6,[Europe/Paris],FR,Europe,France,Paris
7,[Europe/Rome],IT,Europe,Italy,Rome
8,[Europe/Vaduz],LI,Europe,Liechtenstein,Vaduz
9,"[Pacific/Auckland, Pacific/Chatham]",NZ,Oceania,New Zealand,Wellington


In [6]:
# Find Countries with more than 1 timezone
dfMultiTZ = dfTZ[dfTZ['timezones'].apply(lambda x: len(x)>1)]

# Find all the visited states per country
statesByCountry = df.groupby('country').apply(lambda x: x['state'].unique())
dfstatesByCountry = pd.DataFrame(statesByCountry).reset_index()
dfstatesByCountry.columns = ['code','states']

# Join dfMultiTZ and dfstatesByCountry
dfMultiTZ = pd.merge(dfMultiTZ, dfstatesByCountry, on='code')
dfMultiTZ

Unnamed: 0,timezones,code,continent,name,capital,states
0,"[Australia/Lord_Howe, Australia/Hobart, Austra...",AU,Oceania,Australia,Canberra,"[Victoria, Queensland, New South Wales]"
1,"[America/St_Johns, America/Halifax, America/Gl...",CA,North America,Canada,Ottawa,"[New Brunswick, Quebec, Ontario, Alberta, Brit..."
2,"[Pacific/Auckland, Pacific/Chatham]",NZ,Oceania,New Zealand,Wellington,"[Auckland, Waikato, Taranaki, Bay of Plenty, H..."
3,"[America/New_York, America/Detroit, America/Ke...",US,North America,United States,"Washington, D.C.","[Nevada, Arizona, Utah, Colorado, New Mexico, ..."


In [7]:
# # Manually enter timezones for countries with multiple time zones #
# # Used: https://www.worldtimezone.com/wtz020.php to determine time zones
# # function outputs dictionary of states and corresponding timezone based on user input
# def inputTZ(df, rowNo):
#     tzmap = {}    
#     print(df['name'][rowNo]) # so user know's what country they're dealing with
    
#     for state in df['states'][rowNo]: # for every sttate
#         tz = input(state + ': ') # ask user for timezone
#         tzmap[state] = tz # save to dict
#     return {df['code'][rowNo] : tzmap} # return with dict of coutry:{mappings}

# # Run program
# i = 0
# allTZs = []

# while i < len(dfMultiTZ):
#     allTZs.append(inputTZ(dfMultiTZ, i))
#     i += 1
# allTZs
# # Save resulting dict as separate json file after evrything has printed

In [8]:
# Import timezones by state in dict
multiTZs = json.load(open('data/state_timezones_simp.json', 'r')) 

# Create timezones by country code dict (of singular instances)
dfSingTZ = dfTZ[['timezones','code']]
dfSingTZ = dfSingTZ[dfSingTZ['timezones'].apply(lambda x: len(x)<2)] # Find all values with 1 timezone
dfSingTZ['timezones'] = dfSingTZ['timezones'].apply(lambda x: x[0]) # Remove the timezone from its list
singleTZs = dfSingTZ.set_index('code').to_dict() # Make it into a dict

# Combine dicts for mapping timezones
tzMap = {**multiTZs, **singleTZs['timezones']}

# Map timezones to original df
def mapCountryTZ(col):
    if type(tzMap[col['country']]) is dict: #if the value is a dict
        return tzMap[col['country']][col['state']] #return the state value
    else: return tzMap[col['country']] #otherwise return the country value

    
df['timezone'] = df.apply(mapCountryTZ, axis=1)
df.head()

Unnamed: 0,track,time,alt,lat,long,city,state,country,timezone
0,0,2019-05-08 06:57:15+00:00,741.08,36.111288,-115.261516,Spring Valley,Nevada,US,America/Los_Angeles
1,0,2019-05-08 15:27:00+00:00,754.07,36.111416,-115.261345,Spring Valley,Nevada,US,America/Los_Angeles
2,0,2019-05-08 15:27:08+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US,America/Los_Angeles
3,0,2019-05-08 15:27:09+00:00,753.0,36.111426,-115.261353,Spring Valley,Nevada,US,America/Los_Angeles
4,0,2019-05-08 15:28:09+00:00,750.0,36.11133,-115.261409,Spring Valley,Nevada,US,America/Los_Angeles


In [None]:
dfMultiTZ['states'][3]

In [None]:
# Group states by country code
# https://stackoverflow.com/questions/48979604/pandas-for-each-unique-value-in-one-column-get-unique-values-in-another-column


dfMultiTZ.merge(statesByCountry, left_index=True, right_index=True)

# for code in statesByCountry:
#     print(code.index())
#     if statesByCountry in dfMultiTZ['code']:
#         print(statesByCountry)

In [None]:
dfMultiTZ['states'][2]

In [None]:
for tt in dfAllTimezones['timezones']:
    print(tt)

In [None]:
# type(df['time'][0])
df['time'][0].to_pydatetime()

In [None]:
# https://www.saltycrane.com/blog/2009/05/converting-time-zones-datetime-objects-python/
for zone in all_timezones:
        print(zone)
#     if 'Australia' in zone:
#         print (zone)

In [None]:
df.loc[df['country'] == 'MY']

In [None]:
# df['time'] = pd.to_datetime(df['time'])
# df['time']

import datetime as dt
# readable = datetime.datetime.fromtimestamp().isoformat()
# print(readable)

startdate = dt.datetime.combine(df['time'][0].date(), df['time'][0].time())
for t in df['time']:
    print(t.tzinfo)

In [None]:
# Create new DF of values that need to be tracked
dfValues = df[['track','time','alt','lat','long']].copy()
dfValues

In [None]:
# Find difference between trailing rows
dfDiff = dfValues.diff()
dfDiff.head()

In [None]:
dfValues = dfValues.diff()

In [None]:
# Create new Df with only values that need to be differentiated
dfValues = df[['track','alt','lat','long']].copy()
dfValues.dtypes

In [None]:
type(df['time'][0])

In [None]:
old[['A', 'C', 'D']].copy()

In [None]:
Etc/GMT
Etc/GMT+0
Etc/GMT+1
Etc/GMT+10
Etc/GMT+11
Etc/GMT+12
Etc/GMT+2
Etc/GMT+3
Etc/GMT+4
Etc/GMT+5
Etc/GMT+6
Etc/GMT+7
Etc/GMT+8
Etc/GMT+9
Etc/GMT-0
Etc/GMT-1
Etc/GMT-10
Etc/GMT-11
Etc/GMT-12
Etc/GMT-13
Etc/GMT-14
Etc/GMT-2
Etc/GMT-3
Etc/GMT-4
Etc/GMT-5
Etc/GMT-6
Etc/GMT-7
Etc/GMT-8
Etc/GMT-9
Etc/GMT0
Etc/Greenwich
Etc/UCT
Etc/UTC
Etc/Universal
Etc/Zulu

In [None]:
# Merge Address DF with original DF
dfFinal = df.merge(dfAddress, left_index=True, right_index=True)


In [None]:

dfTimeSortedDiff = dfTimeSorted.diff()
dfTimeSortedDiff.head()

In [None]:
# Merge DFs and rename columns
dff = dfTimeSorted.merge(dfTimeSortedDiff, left_index=True, right_index=True)
dff.columns = ['TrackNo','Time', 'Latitude', 'Longitude','Elevation','NewTrack','TimeDiff','LatitudeDiff', 'LongitudeDiff', 'ElevationDiff']
dff['NewTrack'] = dff['NewTrack'].fillna(1).astype(int)
dff.head()

In [None]:
dff.loc[(dff['LatitudeDiff'] < 0.0009) & (dff['LatitudeDiff'] > -0.0009) & (dff['LongitudeDiff'] < 0.0009) & (dff['LongitudeDiff'] > -0.0009)]

In [None]:
dff['NewTrack'] = dff['NewTrack'].map({'0': 'NaN'})

In [None]:
# Search for the coordinates and unpack resulting address dictionary
addy = json.loads(json.dumps(rg.search(coordAU)[0]))

In [None]:
# take min and max of tract and see if it is more than 1 day

# break down walking vs. transportation

# smaller clusters of walking stops or transportation stops