Skip to content

Commit

Permalink
Add parsing of zone1970.tab table data.
Browse files Browse the repository at this point in the history
  • Loading branch information
lau committed Mar 30, 2015
1 parent be7358c commit 69081f3
Show file tree
Hide file tree
Showing 7 changed files with 495 additions and 5 deletions.
2 changes: 1 addition & 1 deletion dl_latest_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ mkdir -p source_data/ \
&& wget 'https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz' \
&& tar -zxvf tzdata-latest.tar.gz \
&& rm tzdata-latest.tar.gz \
&& rm factory leap-seconds.* leapseconds leapseconds.* Makefile iso3166.tab README systemv yearistype.sh zone.tab zone1970.tab backzone checktab.awk checklinks.awk CONTRIBUTING Theory zoneinfo2tdf.pl \
&& rm factory leap-seconds.* leapseconds leapseconds.* Makefile iso3166.tab README systemv yearistype.sh zone.tab backzone checktab.awk checklinks.awk CONTRIBUTING Theory zoneinfo2tdf.pl \
&& grep -o 'Release [0-9]\{4\}.*' NEWS | head -1 > RELEASE_LINE_FROM_NEWS \
&& rm NEWS
51 changes: 51 additions & 0 deletions lib/tzdata/table_data.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
defmodule Tzdata.TableData do
file_read = Tzdata.TableParser.read_file |> Enum.to_list

timezones = Enum.map(file_read, &(&1["timezone"]))

@doc """
Returns a list of all timezones found in the zone1970.tab file
"""
def timezones do
unquote(Macro.escape(timezones))
end

country_codes = file_read
|> Enum.flat_map(&(&1["country_codes"]))
|> Enum.uniq
|> Enum.sort

@doc """
Returns a list of all country_codes found in the zone1970.tab file
"""
def country_codes do
unquote(Macro.escape(
country_codes
))
end

#by_timezone = file_read |> Enum.group_by(&(&1["timezone"]))

keyword_dict_by_country_codes = file_read |> Enum.flat_map(fn entry ->
Enum.map(entry["country_codes"], &({&1|>String.to_atom, entry}))
end)

Enum.each country_codes, fn (country_code) ->
def for_country_code(unquote(country_code)) do
unquote(Macro.escape(
Keyword.get_values keyword_dict_by_country_codes,
String.to_atom(country_code)
))
end
end
def for_country_code(_), do: :country_code_not_found

Enum.each timezones, fn (timezone) ->
def for_timezone(unquote(timezone)) do
unquote(Macro.escape(
Enum.find(file_read, fn(elem) -> elem["timezone"] == timezone end )
))
end
end
def for_timezone(_), do: :timezone_not_found
end
38 changes: 38 additions & 0 deletions lib/tzdata/table_parser.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
defmodule Tzdata.TableParser do
@moduledoc """
Parsing of the table file zone1970.tab
"""
@file_name "zone1970.tab"
def read_file(dir_prepend \\ "source_data", file_name \\ @file_name) do
File.stream!("#{dir_prepend}/#{file_name}")
|> process_file
end

def process_file(file_stream) do
file_stream
|> filter_comment_lines
|> filter_empty_lines
|> Stream.map(&(strip_comment(&1))) # Strip comments at line end. Like this comment.
|> Stream.map(&(process_line(&1)))
end

@line_regex ~r/(?<country_codes>[^\s]+)[\s]+(?<latlong>[^\s]+)[\s]+(?<timezone>[^\s]+)[\s]+(?<comments>[^\n]+)?/
defp process_line(line) do
map = Regex.named_captures(@line_regex, line)
map = %{map | "country_codes" => split_country_codes(map["country_codes"]) }
map
end

defp split_country_codes(string) do
String.split(string, ",")
end

defp strip_comment(line), do: Regex.replace(~r/[\s]*#.+/, line, "")
defp filter_comment_lines(input) do
Stream.filter(input, fn x -> !Regex.match?(~r/^[\s]*#/, x) end)
end
defp filter_empty_lines(input) do
Stream.filter(input, fn x -> !Regex.match?(~r/^\n$/, x) end)
end
end
Loading

0 comments on commit 69081f3

Please sign in to comment.