pandas-dev · wesm · Apr 2, 2012 · Mar 26, 2012 · Mar 26, 2012 · Mar 26, 2012
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -667,10 +667,10 @@ def _get_handle(path, mode, encoding=None):
 if py3compat.PY3:  # pragma: no cover
     def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
         # ignore encoding
-        return csv.reader(f, dialect=csv.excel, **kwds)
+        return csv.reader(f, dialect=dialect, **kwds)
 
     def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
-        return csv.writer(f, dialect=csv.excel, **kwds)
+        return csv.writer(f, dialect=dialect, **kwds)
 else:
     class UnicodeReader:
         """

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -4,6 +4,7 @@
 from StringIO import StringIO
 import re
 from itertools import izip
+from urlparse import urlparse
 
 import numpy as np
 
@@ -12,6 +13,7 @@
 import datetime
 import pandas.core.common as com
 import pandas._tseries as lib
+from pandas.util import py3compat
 
 from pandas.util.decorators import Appender
 
@@ -20,7 +22,10 @@
 
 Parameters
 ----------
-filepath_or_buffer : string or file handle / StringIO
+filepath_or_buffer : string or file handle / StringIO. The string could be
+    a URL. Valid URL schemes include http://, ftp://, and file://. For
+    file:// URLs, a host is expected. For instance, a local file could be
+    file://localhost/path/to/table.csv
 %s
 header : int, default 0
     Row to use for the column labels of the parsed DataFrame
@@ -80,7 +85,6 @@
 %s
 """ % (_parser_params % _table_sep)
 
-
 _fwf_widths = """\
 colspecs : a list of pairs (tuples), giving the extents
     of the fixed-width fields of each line as half-open internals
@@ -99,8 +103,31 @@
 """ % (_parser_params % _fwf_widths)
 
 
+def _is_url(url):
+    """
+    Very naive check to see if url is an http(s), ftp, or file location.
+    """
+    parsed_url = urlparse(url)
+    if parsed_url.scheme in ['http','file', 'ftp', 'https']:
+        return True
+    else:
+        return False
+
 def _read(cls, filepath_or_buffer, kwds):
     "Generic reader of line files."
+
+    if isinstance(filepath_or_buffer, str) and _is_url(filepath_or_buffer):
+        from urllib2 import urlopen
+        filepath_or_buffer = urlopen(filepath_or_buffer)
+        if py3compat.PY3:
+            from io import TextIOWrapper
+            if encoding:
+                errors = 'strict'
+            else:
+                errors = 'replace'
+                encoding = 'utf-8'
+            filepath_or_buffer = StringIO(filepath_or_buffer.read().decode(encoding, errors))
+
     if hasattr(filepath_or_buffer, 'read'):
         f = filepath_or_buffer
     else:

diff --git a/pandas/io/tests/salary.table b/pandas/io/tests/salary.table
@@ -0,0 +1,47 @@
+S	X	E	M
+13876 	1 	1 	1 
+11608 	1 	3 	0 
+18701 	1 	3 	1 
+11283 	1 	2 	0 
+11767 	1 	3 	0 
+20872 	2 	2 	1 
+11772 	2 	2 	0 
+10535 	2 	1 	0 
+12195 	2 	3 	0 
+12313 	3 	2 	0 
+14975 	3 	1 	1 
+21371 	3 	2 	1 
+19800 	3 	3 	1 
+11417 	4 	1 	0 
+20263 	4 	3 	1 
+13231 	4 	3 	0 
+12884 	4 	2 	0 
+13245 	5 	2 	0 
+13677 	5 	3 	0 
+15965 	5 	1 	1 
+12336 	6 	1 	0 
+21352 	6 	3 	1 
+13839 	6 	2 	0 
+22884 	6 	2 	1 
+16978 	7 	1 	1 
+14803 	8 	2 	0 
+17404 	8 	1 	1 
+22184 	8 	3 	1 
+13548 	8 	1 	0 
+14467 	10 	1 	0 
+15942 	10 	2 	0 
+23174 	10 	3 	1 
+23780 	10 	2 	1 
+25410 	11 	2 	1 
+14861 	11 	1 	0 
+16882 	12 	2 	0 
+24170 	12 	3 	1 
+15990 	13 	1 	0 
+26330 	13 	2 	1 
+17949 	14 	2 	0 
+25685 	15 	3 	1 
+27837 	16 	2 	1 
+18838 	16 	2 	0 
+17483 	16 	1 	0 
+19207 	17 	2 	0 
+19346 	20 	1 	0 
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -764,6 +764,18 @@ def test_fwf(self):
         df = read_fwf(StringIO(data3), colspecs=colspecs, delimiter='~', header=None)
         assert_frame_equal(df, expected)
 
+    def test_url(self):
+        # HTTP(S)
+        url = 'https://raw.github.com/jseabold/pandas/read-table-url/pandas/io/tests/salary.table'
+        url_table = read_table(url)
+        dirpath = curpath()
+        localtable = os.path.join(dirpath, 'salary.table')
+        local_table = read_table(localtable)
+        assert_frame_equal(url_table, local_table)
+        # FILE
+        url_table = read_table('file://localhost/'+localtable)
+        assert_frame_equal(url_table, local_table)
+        #TODO: ftp testing
 
 class TestParseSQL(unittest.TestCase):
 

diff --git a/setup.py b/setup.py
@@ -393,7 +393,8 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
                 ],
       package_data={'pandas.io' : ['tests/*.h5',
                                    'tests/*.csv',
-                                   'tests/*.xls'],
+                                   'tests/*.xls',
+                                   'tests/*.table'],
                     'pandas.tests' : ['data/*.pickle',
                                       'data/*.csv']
                    },
-Original file line number
+Diff line change
@@ -0,0 +1,47 @@
+    S	X	E	M
+	1 	1 	1
+	1 	3 	0
+	1 	3 	1
+	1 	2 	0
+	1 	3 	0
+	2 	2 	1
+	2 	2 	0
+	2 	1 	0
+	2 	3 	0
+	3 	2 	0
+	3 	1 	1
+	3 	2 	1
+	3 	3 	1
+	4 	1 	0
+	4 	3 	1
+	4 	3 	0
+	4 	2 	0
+	5 	2 	0
+	5 	3 	0
+	5 	1 	1
+	6 	1 	0
+	6 	3 	1
+	6 	2 	0
+	6 	2 	1
+	7 	1 	1
+	8 	2 	0
+	8 	1 	1
+	8 	3 	1
+	8 	1 	0
+	10 	1 	0
+	10 	2 	0
+	10 	3 	1
+	10 	2 	1
+	11 	2 	1
+	11 	1 	0
+	12 	2 	0
+	12 	3 	1
+	13 	1 	0
+	13 	2 	1
+	14 	2 	0
+	15 	3 	1
+	16 	2 	1
+	16 	2 	0
+	16 	1 	0
+	17 	2 	0
+	20 	1 	0