Add parsing of zone1970.tab table data.

lau · Mar 30, 2015 · 69081f3 · 69081f3
1 parent be7358c
commit 69081f3
Show file tree

Hide file tree

Showing 7 changed files with 495 additions and 5 deletions.
diff --git a/dl_latest_data.sh b/dl_latest_data.sh
@@ -11,6 +11,6 @@ mkdir -p source_data/       \
   && wget 'https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz' \
   && tar -zxvf tzdata-latest.tar.gz                    \
   && rm tzdata-latest.tar.gz                           \
-  && rm factory leap-seconds.* leapseconds leapseconds.* Makefile iso3166.tab README systemv yearistype.sh zone.tab zone1970.tab backzone checktab.awk checklinks.awk CONTRIBUTING Theory zoneinfo2tdf.pl \
+  && rm factory leap-seconds.* leapseconds leapseconds.* Makefile iso3166.tab README systemv yearistype.sh zone.tab backzone checktab.awk checklinks.awk CONTRIBUTING Theory zoneinfo2tdf.pl \
   && grep -o 'Release [0-9]\{4\}.*' NEWS | head -1 > RELEASE_LINE_FROM_NEWS \
   && rm NEWS
diff --git a/lib/tzdata/table_data.ex b/lib/tzdata/table_data.ex
@@ -0,0 +1,51 @@
+defmodule Tzdata.TableData do
+  file_read = Tzdata.TableParser.read_file |> Enum.to_list
+
+  timezones = Enum.map(file_read, &(&1["timezone"]))
+
+  @doc """
+  Returns a list of all timezones found in the zone1970.tab file
+  """
+  def timezones do
+    unquote(Macro.escape(timezones))
+  end
+
+  country_codes = file_read
+      |> Enum.flat_map(&(&1["country_codes"]))
+      |> Enum.uniq
+      |> Enum.sort
+
+  @doc """
+  Returns a list of all country_codes found in the zone1970.tab file
+  """
+  def country_codes do
+    unquote(Macro.escape(
+      country_codes
+    ))
+  end
+
+  #by_timezone = file_read |> Enum.group_by(&(&1["timezone"]))
+
+  keyword_dict_by_country_codes = file_read |> Enum.flat_map(fn entry ->
+        Enum.map(entry["country_codes"], &({&1|>String.to_atom, entry}))
+      end)
+
+  Enum.each country_codes, fn (country_code) ->
+    def for_country_code(unquote(country_code)) do
+      unquote(Macro.escape(
+        Keyword.get_values keyword_dict_by_country_codes,
+                           String.to_atom(country_code)
+      ))
+    end
+  end
+  def for_country_code(_), do: :country_code_not_found
+
+  Enum.each timezones, fn (timezone) ->
+    def for_timezone(unquote(timezone)) do
+      unquote(Macro.escape(
+        Enum.find(file_read, fn(elem) -> elem["timezone"] == timezone end )
+      ))
+    end
+  end
+  def for_timezone(_), do: :timezone_not_found
+end
diff --git a/lib/tzdata/table_parser.ex b/lib/tzdata/table_parser.ex
@@ -0,0 +1,38 @@
+defmodule Tzdata.TableParser do
+  @moduledoc """
+  Parsing of the table file zone1970.tab
+
+  """
+  @file_name "zone1970.tab"
+  def read_file(dir_prepend \\ "source_data", file_name \\ @file_name) do
+    File.stream!("#{dir_prepend}/#{file_name}")
+    |> process_file
+  end
+
+  def process_file(file_stream) do
+    file_stream
+    |> filter_comment_lines
+    |> filter_empty_lines
+    |> Stream.map(&(strip_comment(&1))) # Strip comments at line end. Like this comment.
+    |> Stream.map(&(process_line(&1)))
+  end
+
+  @line_regex ~r/(?<country_codes>[^\s]+)[\s]+(?<latlong>[^\s]+)[\s]+(?<timezone>[^\s]+)[\s]+(?<comments>[^\n]+)?/
+  defp process_line(line) do
+    map = Regex.named_captures(@line_regex, line)
+    map = %{map | "country_codes" => split_country_codes(map["country_codes"]) }
+    map
+  end
+
+  defp split_country_codes(string) do
+    String.split(string, ",")
+  end
+
+  defp strip_comment(line), do: Regex.replace(~r/[\s]*#.+/, line, "")
+  defp filter_comment_lines(input) do
+    Stream.filter(input, fn x -> !Regex.match?(~r/^[\s]*#/, x) end)
+  end
+  defp filter_empty_lines(input) do
+    Stream.filter(input, fn x -> !Regex.match?(~r/^\n$/, x) end)
+  end
+end