diff --git a/agpl-containers-string_vectors.ads b/agpl-containers-string_vectors.ads new file mode 100644 index 0000000..9eff3ff --- /dev/null +++ b/agpl-containers-string_vectors.ads @@ -0,0 +1,33 @@ +------------------------------------------------------------------------------ +-- AGPL -- +-- -- +-- Copyright (C) 2003 -- +-- A. Mosteo. -- +-- -- +-- Authors: A. Mosteo. (public@mosteo.com) -- +-- -- +-- If you have any questions in regard to this software, please address -- +-- them to the above email. -- +-- -- +-- This program is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This program is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +------------------------------------------------------------------------------ + +with Ada.Containers.Indefinite_Vectors; + +package Agpl.Containers.String_Vectors is new + Ada.Containers.Indefinite_Vectors (Positive, String); + +pragma Preelaborate (Agpl.Containers.String_Vectors); + diff --git a/agpl-filesystem.adb b/agpl-filesystem.adb index 31280fd..2934a73 100644 --- a/agpl-filesystem.adb +++ b/agpl-filesystem.adb @@ -34,6 +34,8 @@ with Agpl.Strings.Fields; +with Ada.Streams.Stream_Io; + package body Agpl.Filesystem is ------------------ @@ -61,4 +63,37 @@ package body Agpl.Filesystem is return Strings.Fields.String_Tail_Reverse (This, '.') & '.' & New_Ext; end Replace_Extension; + --------------- + -- Read_File -- + --------------- + + function Read_File (Name : String) return Ustring is + Result : Ustring; + use Ada.Streams.Stream_Io; + F : File_Type; + begin + Open (F, In_File, Name); + declare + Length : constant Natural := Natural (Size (F)); + Remain : Natural := Length; + Stream : constant Stream_Access := Ada.Streams.Stream_Io.Stream (F); + begin + while Remain > 0 loop + declare + Chunk : String (1 .. Natural'Min (Remain, 1000)); + begin + String'Read (Stream, Chunk); + Asu.Append (Result, Chunk); + Remain := Remain - Chunk'Length; + end; + end loop; + end; + Close (F); + return Result; + exception + when others => + Close (F); + raise; + end Read_File; + end Agpl.Filesystem; diff --git a/agpl-filesystem.ads b/agpl-filesystem.ads index 00158b6..3d371e9 100644 --- a/agpl-filesystem.ads +++ b/agpl-filesystem.ads @@ -32,9 +32,11 @@ ------------------------------------------------------------------------------ -- $Id: agpl.ads,v 1.4 2004/01/21 21:05:25 Jano Exp $ +with Agpl.Types.Ustrings; use Agpl.Types.Ustrings; + package Agpl.Filesystem is - pragma Preelaborate; +-- pragma Preelaborate; -- Returns the same string with a Folder_Separator added if it is missing. function Ensure_Slash (This : in String; Separator : in Character := '/') @@ -45,4 +47,7 @@ package Agpl.Filesystem is return String; -- Replace the extension by a new one + function Read_File (Name : String) return Ustring; + -- Read a full file as a string! + end Agpl.Filesystem; diff --git a/agpl-strings-utf8.adb b/agpl-strings-utf8.adb new file mode 100644 index 0000000..8584751 --- /dev/null +++ b/agpl-strings-utf8.adb @@ -0,0 +1,46 @@ +with Agpl.Types.Ustrings; use Agpl.Types.Ustrings; + +package body Agpl.Strings.Utf8 is + + ------------------- + -- Extract_Words -- + ------------------- + + function Extract_Words (Str : String) return Containers.String_Vectors.Vector + is + Words : Containers.String_Vectors.Vector; + Str_Idx : Natural := Str'First; + Word : Ustring; + use Asu; + begin + while Str_Idx <= Str'Last loop + declare + Char : Unicode.Unicode_Char; + begin + Unicode.Ces.Utf8.Read (Str, Str_Idx, Char); + + if Unicode.Is_Letter (Char) or else Unicode.Is_Digit (Char) then + declare + Code : String (1 .. 20); + Last : Natural := Code'First; + begin + Unicode.Ces.Utf8.Encode (Char, Code, Last); + Append (Word, Code (Code'First + 1 .. Last)); + end; + end if; + + if Str_Idx > Str'Last or else + not (Unicode.Is_Letter (Char) or else Unicode.Is_Digit (Char)) + then + if Word /= Null_Ustring then + Words.Append (+Word); + Word := Null_Ustring; + end if; + end if; + end; + end loop; + + return Words; + end Extract_Words; + +end Agpl.Strings.Utf8; diff --git a/agpl-strings-utf8.ads b/agpl-strings-utf8.ads new file mode 100644 index 0000000..e1ae575 --- /dev/null +++ b/agpl-strings-utf8.ads @@ -0,0 +1,46 @@ +------------------------------------------------------------------------------ +-- ADAGIO - ADALID - AENEA. -- +-- -- +-- Copyright (C) 2003 -- +-- A. Mosteo. -- +-- -- +-- Authors: A. Mosteo. (adagio@mosteo.com) -- +-- -- +-- If you have any questions in regard to this software, please address -- +-- them to the above email. -- +-- -- +-- This program is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This program is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +-- -- +-- You are not allowed to use any part of this code to develop a program -- +-- whose output would be used to harass or prosecute other users of the -- +-- networks Adagio connects with. All data collected with Adagio or a tool -- +-- containing Adagio code about other network users must remain -- +-- confidential and cannot be made public by any mean, nor be used to -- +-- harass or legally prosecute these users. -- +------------------------------------------------------------------------------ +-- $Id: agpl-strings.ads,v 1.3 2004/02/03 22:52:09 Jano Exp $ + +with Agpl.Containers.String_Vectors; + +with Unicode.Ces.Utf8; + +package Agpl.Strings.Utf8 is + + subtype Utf8_String is Unicode.Ces.Utf8.Utf8_String; + + function Extract_Words (Str : String) return Containers.String_Vectors.Vector; + -- Will split the string using non-letters and non-numbers as delimiters + +end Agpl.Strings.Utf8; diff --git a/agpl-url.adb b/agpl-url.adb new file mode 100644 index 0000000..6d15313 --- /dev/null +++ b/agpl-url.adb @@ -0,0 +1,159 @@ +------------------------------------------------------------------------------ +-- Ada Web Server -- +-- -- +-- Copyright (C) 2000-2004 -- +-- ACT-Europe -- +-- -- +-- This library is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This library is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +-- -- +-- As a special exception, if other files instantiate generics from this -- +-- unit, or you link this unit with other files to produce an executable, -- +-- this unit does not by itself cause the resulting executable to be -- +-- covered by the GNU General Public License. This exception does not -- +-- however invalidate any other reasons why the executable file might be -- +-- covered by the GNU Public License. -- +------------------------------------------------------------------------------ + +-- $Id: aws-url.adb,v 1.37 2004/11/10 13:53:24 obry Exp $ + +with Agpl.Conversions; + +with Ada.Characters.Handling; + +package body Agpl.URL is + + subtype Escape_Code is String (1 .. 2); + + Not_Escaped : constant Escape_Code := " "; + + function Code (C : in Character) return Escape_Code; + pragma Inline (Code); + -- Returns hexadecimal code for character C + + subtype ASCII_7 is Character range Character'First .. Character'Val (127); + type ASCII_7_Set is array (ASCII_7) of Escape_Code; + + function Build_Hex_Escape return ASCII_7_Set; + -- Returns the table with pre-computed encoding for 7bits characters + + ---------------------- + -- Build_Hex_Escape -- + ---------------------- + + function Build_Hex_Escape return ASCII_7_Set is + Result : ASCII_7_Set; + begin + for C in Character'Val (0) .. Character'Val (127) loop + if Strings.Maps.Is_In (C, Default_Encoding_Set) then + Result (C) := Code (C); + else + Result (C) := Not_Escaped; + end if; + end loop; + return Result; + end Build_Hex_Escape; + + ---------- + -- Code -- + ---------- + + function Code (C : in Character) return Escape_Code is + begin + return Conversions.To_Hex (Character'Pos (C)); + end Code; + + Hex_Escape : constant ASCII_7_Set := Build_Hex_Escape; + -- Limit Hex_Escape to 7bits ASCII characters only. Other ISO-8859-1 are + -- handled separately in Encode function. Space character is not processed + -- specifically, contrary to what is done in AWS.URL. + + ------------ + -- Decode -- + ------------ + + function Decode (Str : in String) return String is + Res : String (1 .. Str'Length); + K : Natural := 0; + I : Positive := Str'First; + begin + if Str = "" then + return ""; + end if; + + loop + K := K + 1; + + if Str (I) = '%' + and then I + 2 <= Str'Last + and then Characters.Handling.Is_Hexadecimal_Digit (Str (I + 1)) + and then Characters.Handling.Is_Hexadecimal_Digit (Str (I + 2)) + then + Res (K) := Conversions.From_Hex (Str (I + 1 .. I + 2)); + I := I + 2; + + elsif Str (I) = '+' then + Res (K) := ' '; + + else + Res (K) := Str (I); + end if; + + I := I + 1; + exit when I > Str'Last; + end loop; + + return Res (1 .. K); + end Decode; + + ------------ + -- Encode -- + ------------ + + function Encode + (Str : in String; + Encoding_Set : in Strings.Maps.Character_Set := Default_Encoding_Set) + return String + is + C_128 : constant Character := Character'Val (128); + Res : String (1 .. Str'Length * 3); + K : Natural := 0; + begin + for I in Str'Range loop + if Strings.Maps.Is_In (Str (I), Encoding_Set) then + -- This character must be encoded + + K := K + 1; + Res (K) := '%'; + K := K + 1; + + if Str (I) < C_128 then + -- We keep a table for characters lower than 128 for efficiency + Res (K .. K + 1) := Hex_Escape (Str (I)); + else + Res (K .. K + 1) := Code (Str (I)); + end if; + + K := K + 1; + + else + K := K + 1; + Res (K) := Str (I); + end if; + end loop; + + return Res (1 .. K); + end Encode; + +end Agpl.URL; diff --git a/agpl-url.ads b/agpl-url.ads new file mode 100644 index 0000000..4617c29 --- /dev/null +++ b/agpl-url.ads @@ -0,0 +1,84 @@ +------------------------------------------------------------------------------ +-- Ada Web Server -- +-- -- +-- Copyright (C) 2000-2004 -- +-- ACT-Europe -- +-- -- +-- This library is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This library is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +-- -- +-- As a special exception, if other files instantiate generics from this -- +-- unit, or you link this unit with other files to produce an executable, -- +-- this unit does not by itself cause the resulting executable to be -- +-- covered by the GNU General Public License. This exception does not -- +-- however invalidate any other reasons why the executable file might be -- +-- covered by the GNU Public License. -- +------------------------------------------------------------------------------ + +-- $Id: aws-url.ads,v 1.18 2004/11/10 13:53:24 obry Exp $ + +with Ada.Strings.Maps; + +package Agpl.URL is + + use Ada; + + -- The general URL form as described in RFC2616 is: + -- + -- http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + -- + -- Note also that there are different RFC describing URL like the 2616 and + -- 1738 but they use different terminologies. Here we try to follow the + -- names used in RFC2616 but we have implemented some extensions at the + -- end of this package. For example the way Path and File are separated or + -- the handling of user/password which is explicitly not allowed in the + -- RFC but are used and supported in many browsers. Here are the extended + -- URL supported: + -- + -- http://username:password@www.here.com:80/dir1/dir2/xyz.html?p=8&x=doh + -- | | | | | | + -- protocol host port path file parameters + -- + -- <-- pathname --> + + -- + -- URL Encoding and Decoding + -- + + Default_Encoding_Set : constant Strings.Maps.Character_Set; + + function Encode + (Str : in String; + Encoding_Set : in Strings.Maps.Character_Set := Default_Encoding_Set) + return String; + -- Encode Str into a URL-safe form. Many characters are forbiden into an + -- URL and needs to be encoded. A character is encoded by %XY where XY is + -- the character's ASCII hexadecimal code. For example a space is encoded + -- as %20. + + function Decode (Str : in String) return String; + -- This is the oposite of Encode above + +private + + use type Ada.Strings.Maps.Character_Set; + + Default_Encoding_Set : constant Strings.Maps.Character_Set + := Strings.Maps.To_Set + (Span => (Low => Character'Val (128), + High => Character'Val (Character'Pos (Character'Last)))) + or + Strings.Maps.To_Set (";/?:@&=+$,<>#%""{}|\^[]` "); + +end Agpl.URL; diff --git a/agpl-ustrings.ads b/agpl-ustrings.ads new file mode 100644 index 0000000..60f9385 --- /dev/null +++ b/agpl-ustrings.ads @@ -0,0 +1,29 @@ +------------------------------------------------------------------------------ +-- AGPL -- +-- -- +-- Copyright (C) 2003 -- +-- A. Mosteo. -- +-- -- +-- Authors: A. Mosteo. (public@mosteo.com) -- +-- -- +-- If you have any questions in regard to this software, please address -- +-- them to the above email. -- +-- -- +-- This program is free software; you can redistribute it and/or modify -- +-- it under the terms of the GNU General Public License as published by -- +-- the Free Software Foundation; either version 2 of the License, or (at -- +-- your option) any later version. -- +-- -- +-- This program is distributed in the hope that it will be useful, but -- +-- WITHOUT ANY WARRANTY; without even the implied warranty of -- +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- +-- General Public License for more details. -- +-- -- +-- You should have received a copy of the GNU General Public License -- +-- along with this library; if not, write to the Free Software Foundation, -- +-- Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -- +------------------------------------------------------------------------------ + +with Agpl.Types.Ustrings; + +package Agpl.Ustrings renames Agpl.Types.Ustrings; diff --git a/agpl-xml.adb b/agpl-xml.adb index 9f7edc3..15ca185 100644 --- a/agpl-xml.adb +++ b/agpl-xml.adb @@ -45,7 +45,6 @@ with DOM.Readers; with Input_Sources.File; with Input_Sources.Strings; with Sax.Readers; -with Unicode.CES.Basic_8bit; use Unicode.CES; with Ada.Exceptions; use Ada.Exceptions; with Text_IO; @@ -78,7 +77,10 @@ package body Agpl.Xml is -- From_string -- ------------------------------------------------------------------------ -- Parses and XML string (Latin1 accepted) - function From_String (Data : in String) return Document is + function From_String + (Data : in String; + Encoding : in Unicode.Ces.Encoding_Scheme := Unicode.Ces.Utf8.Utf8_Encoding) + return Document is Tree : DOM.Readers.Tree_Reader; String_handle : Input_Sources.Strings.String_Input; N : Node; @@ -91,7 +93,7 @@ package body Agpl.Xml is Input_Sources.Strings.Open (Data'Unrestricted_Access, - Basic_8bit.Basic_8bit_Encoding, + Encoding, String_handle); DOM.Readers.Parse (Tree, String_handle); Input_Sources.Strings.Close (String_handle); @@ -104,7 +106,10 @@ package body Agpl.Xml is -- Parse -- ------------------------------------------------------------------------ -- Read a XML file and stores it in memory; - function Parse (File : String) return Document is + function Parse + (File : String) + return Document + is Tree : DOM.Readers.Tree_Reader; File_handle : Input_Sources.File.File_Input; N : Node; @@ -116,9 +121,9 @@ package body Agpl.Xml is True); Input_Sources.File.Open (File, File_handle); - Input_Sources.File.Set_Encoding - (File_handle, - Basic_8bit.Basic_8bit_Encoding); +-- Input_Sources.File.Set_Encoding +-- (File_handle, +-- Encoding); DOM.Readers.Parse (Tree, File_handle); Input_Sources.File.Close (File_handle); N := DCD.Get_Element (DOM.Readers.Get_Tree (Tree)); @@ -315,20 +320,34 @@ package body Agpl.Xml is return DCD.Create_Element (DCN.Owner_Document (Parent), Name); end Create_Child; + ------------------- + -- Set_Attribute -- + ------------------- + + procedure Set_Attribute (Item : Node; + Attr : String; + Val : String) + is + begin + Dce.Set_Attribute (Item, Attr, Val); + end Set_Attribute; + ------------------------------------------------------------------------ -- Delete -- ------------------------------------------------------------------------ procedure Delete (Item : in out Node) is Dummy : Node; begin - -- If it's the root element, we free everything: - if DCD.Get_Element (DCN.Owner_Document (Item)) = Item then - Dummy := DCN.Owner_Document (Item); - else - Dummy := DCN.Remove_Child (DCN.Parent_Node (Item), Item); + if Item /= null then + -- If it's the root element, we free everything: + if Dcd.Get_Element (Dcn.Owner_Document (Item)) = Item then + Dummy := Dcn.Owner_Document (Item); + else + Dummy := Dcn.Remove_Child (Dcn.Parent_Node (Item), Item); + end if; + Dcn.Free (Dummy, Deep => True); + Item := null; end if; - DCN.Free (Dummy, Deep => True); - Item := null; end Delete; ------------------------------------------------------------------------ diff --git a/agpl-xml.ads b/agpl-xml.ads index 908632b..8960da8 100644 --- a/agpl-xml.ads +++ b/agpl-xml.ads @@ -36,6 +36,8 @@ with DOM.Core; with Dom.Core.Nodes; +with Unicode.Ces; +with Unicode.Ces.Utf8; with Ada.Containers.Vectors; @@ -62,13 +64,16 @@ package Agpl.Xml is -- From_String -- ------------------------------------------------------------------------ -- Parses and XML string (Latin1 accepted) - function From_string (Data : in String) return Document; + function From_String + (Data : in String; + Encoding : in Unicode.Ces.Encoding_Scheme := Unicode.Ces.Utf8.Utf8_Encoding) + return Document; ------------------------------------------------------------------------ -- Parse -- ------------------------------------------------------------------------ -- Read a XML file and stores it in memory; - function Parse (File : String) return Document; + function Parse (File : String) return Document; ------------------------------------------------------------------------ -- Get -- @@ -105,6 +110,10 @@ package Agpl.Xml is Default : Number) return Number; + procedure Set_Attribute (Item : Node; + Attr : String; + Val : String); + -- Add a child by giving its name function Add (Parent : Node; Name : String) return Node; diff --git a/dom-core-nodes-output.adb b/dom-core-nodes-output.adb index d7ecd98..10afda6 100644 --- a/dom-core-nodes-output.adb +++ b/dom-core-nodes-output.adb @@ -39,13 +39,17 @@ ------------------------------------------------------------------------------ -- $Id: dom-core-nodes-output.adb,v 1.3 2004/01/21 21:05:43 Jano Exp $ +with Agpl.Types.Ustrings; with Agpl.Xml; with Sax.Encodings; use Sax.Encodings; +with Ada.Strings.Unbounded.Text_Io; +with Ada.Text_Io; + package body Dom.Core.Nodes.Output is - NL : constant String := (Character'Val (13), Character'Val (10)); + NL : constant String := (1 => Character'Val (10)); Tab : constant Natural := 3; procedure Print_Whites (U : in out ASU.Unbounded_String; Whites : in Natural) is @@ -207,6 +211,43 @@ package body Dom.Core.Nodes.Output is when others => Append (U, Node_Value (N)); end case; + + -- Lame hack: + declare + use Asu; + begin + if Slice (U, 1, 2) /= "" & Nl & U; + end if; + end; + end Print; + + procedure Print + (N : Node; + File : String; + Print_Comments : Boolean := False; + Print_XML_PI : Boolean := False; + With_URI : Boolean := False; + Indent : Natural := 0) + is + use Agpl.Types.Ustrings; + Text : Ustring; + begin + Print (N, Text, Print_Comments, Print_Xml_Pi, With_Uri, Indent); + + declare + use Ada.Strings.Unbounded.Text_Io; + use Ada.Text_Io; + F : File_Type; + begin + Create (F, Out_File, File); + Put_Line (F, Text); + Close (F); + exception + when others => + Close (F); + raise; + end; end Print; procedure Print diff --git a/dom-core-nodes-output.ads b/dom-core-nodes-output.ads index cea7a7b..66ad88c 100644 --- a/dom-core-nodes-output.ads +++ b/dom-core-nodes-output.ads @@ -61,6 +61,15 @@ package Dom.Core.Nodes.Output is -- By default, names are of the form ns_prefix:local_name. However, if -- with_URI is True, names will be ns_URI:local_name instead + procedure Print + (N : Node; + File : String; + Print_Comments : Boolean := False; + Print_XML_PI : Boolean := False; + With_URI : Boolean := False; + Indent : Natural := 0); + -- As previous but to file + procedure Print (List : Dom.Core.Node_List; U : in out ASU.Unbounded_string;