From 3533134f17ea3da14bb36e03d614c86739bcb3eb Mon Sep 17 00:00:00 2001
From: Chris Charlton <c.charlton@bristol.ac.uk>
Date: Thu, 28 Mar 2024 13:30:16 +0000
Subject: [PATCH 1/5] ENH: Add support for reading 110-format Stata dta files

---
 pandas/io/stata.py                              |   6 +++---
 pandas/tests/io/data/stata/stata-compat-110.dta | Bin 0 -> 1514 bytes
 pandas/tests/io/test_stata.py                   |   2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 pandas/tests/io/data/stata/stata-compat-110.dta

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 47d879c022ee6..37494ff42d650 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -91,7 +91,7 @@
 
 _version_error = (
     "Version of given Stata file is {version}. pandas supports importing "
-    "versions 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), "
+    "versions 105, 108, 110 (Stata 7), 111 (Stata 7SE), 113 (Stata 8/9), "
     "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16),"
     "and 119 (Stata 15/16, over 32,767 variables)."
 )
@@ -1393,7 +1393,7 @@ def _get_seek_variable_labels(self) -> int:
 
     def _read_old_header(self, first_char: bytes) -> None:
         self._format_version = int(first_char[0])
-        if self._format_version not in [104, 105, 108, 111, 113, 114, 115]:
+        if self._format_version not in [104, 105, 108, 110, 111, 113, 114, 115]:
             raise ValueError(_version_error.format(version=self._format_version))
         self._set_encoding()
         self._byteorder = ">" if self._read_int8() == 0x1 else "<"
@@ -1408,7 +1408,7 @@ def _read_old_header(self, first_char: bytes) -> None:
         self._time_stamp = self._get_time_stamp()
 
         # descriptors
-        if self._format_version > 108:
+        if self._format_version > 110:
             typlist = [int(c) for c in self._path_or_buf.read(self._nvar)]
         else:
             buf = self._path_or_buf.read(self._nvar)
diff --git a/pandas/tests/io/data/stata/stata-compat-110.dta b/pandas/tests/io/data/stata/stata-compat-110.dta
new file mode 100644
index 0000000000000000000000000000000000000000..68e591aba829a31bdce0a3bcfae2f5b5a300801e
GIT binary patch
literal 1514
zcmc~}Vr1Z8U}m5TNJ`4gNlVG;%*;zkt-xnvrUgD(pj@V*8GaR+#zy!Rq~VulU`QdT
zpoD;mVnYK0Dlnu~E%Xf1p`tk1hDLBjs+MpjgDSFkic5TMd?eT;RYQYf42wnuMnhmU
z1O`V4F#iAl`~Uy?|7Xvf**j|{2<)^MvSeULOiE5kO-s+n%wmE%^z0d*eGnBV?S)ZP
XFf%f;FfueS0A=?XgTZBc5fl{wc`rPh

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index d7fb3c0049965..36b47d629a856 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2002,7 +2002,7 @@ def test_read_write_ea_dtypes(self, dtype_backend, temp_file, tmp_path):
         tm.assert_frame_equal(written_and_read_again.set_index("index"), expected)
 
 
-@pytest.mark.parametrize("version", [105, 108, 111, 113, 114])
+@pytest.mark.parametrize("version", [105, 108, 110, 111, 113, 114])
 def test_backward_compat(version, datapath):
     data_base = datapath("io", "data", "stata")
     ref = os.path.join(data_base, "stata-compat-118.dta")

From 48f98f0a7eaa423b0ea0741793032d976b977af3 Mon Sep 17 00:00:00 2001
From: Chris Charlton <c.charlton@bristol.ac.uk>
Date: Wed, 8 May 2024 21:28:38 +0100
Subject: [PATCH 2/5] Add whatsnew note to v3.0.0.rst

---
 doc/source/whatsnew/v3.0.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index e4dad8800d78f..eb3e2b875eaba 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -44,6 +44,8 @@ Other enhancements
 - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
 - :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
+- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:

From 605924b932d761b0ab83e5105adfd921d91c1985 Mon Sep 17 00:00:00 2001
From: Chris Charlton <c.charlton@bristol.ac.uk>
Date: Tue, 9 Apr 2024 18:23:29 +0100
Subject: [PATCH 3/5] Add a test data file containing value labels

---
 pandas/tests/io/data/stata/stata4_110.dta | Bin 0 -> 1528 bytes
 pandas/tests/io/test_stata.py             |  14 ++++++--------
 2 files changed, 6 insertions(+), 8 deletions(-)
 create mode 100644 pandas/tests/io/data/stata/stata4_110.dta

diff --git a/pandas/tests/io/data/stata/stata4_110.dta b/pandas/tests/io/data/stata/stata4_110.dta
new file mode 100644
index 0000000000000000000000000000000000000000..3ea01040448b09f1efa0bf182ba6b814a105bdb1
GIT binary patch
literal 1528
zcmd5+OHRWu5S<pvZ-FY<uwY>!RboTcR9(<+un)i)q6D1Al51CS+QN!%xdlh*QF;@Y
zNdryQMnM83EWMFOd4BU|<k-|}(0~*8gT*{{A0({X@p`P&AM|<<Mp{MDMC)f?2cgvR
zaaY+5LU#v4nFl)>YnjRtp<LXbsWkRS$~k3*t|$~~8>A}_B_;~&)@{2p%=1>iu=l!O
zNg0QF@pY&Ra+}*q!n)ZKoxH!2uUM_JgkagoSw1#44H6ZETDCW>em@h&rm;+ZPgZPs
zV>2uunX@Cg`sP@Gc|MFv=eq>)vV5M$5$K6@l``v*tOc{Xg%^&EtupD)hFJq(0s|1{
zcXfcKzyJi`dl&=-0_XbxOA0`k^9I0z0=&NfyzzrPWa)w*5(Pt+F8L7&V8~Js$GGDZ
z=xkHq^Dw`oH-b_oUU64>sgOe_JOnNBMEa+QbHoMW5^;sNM%*B%N$QN+AwHQ$+dwwy
zEEytAYb2yI3u&B436Y&75UHsIr=GycsWc$fFiycJB4RqgLzVv#a;WGTf>zOR(b%>D
Psz4*D{1K7!Z$|O~T9whd

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 36b47d629a856..c31501a631d99 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -225,11 +225,9 @@ def test_read_dta3(self, file, datapath):
 
         tm.assert_frame_equal(parsed, expected)
 
-    @pytest.mark.parametrize(
-        "file", ["stata4_111", "stata4_113", "stata4_114", "stata4_115", "stata4_117"]
-    )
-    def test_read_dta4(self, file, datapath):
-        file = datapath("io", "data", "stata", f"{file}.dta")
+    @pytest.mark.parametrize("version", [110, 111, 113, 114, 115, 117])
+    def test_read_dta4(self, version, datapath):
+        file = datapath("io", "data", "stata", f"stata4_{version}.dta")
         parsed = self.read_dta(file)
 
         expected = DataFrame.from_records(
@@ -271,11 +269,11 @@ def test_read_dta4(self, file, datapath):
         # stata doesn't save .category metadata
         tm.assert_frame_equal(parsed, expected)
 
-    @pytest.mark.parametrize("file", ["stata4_105", "stata4_108"])
-    def test_readold_dta4(self, file, datapath):
+    @pytest.mark.parametrize("version", [105, 108])
+    def test_readold_dta4(self, version, datapath):
         # This test is the same as test_read_dta4 above except that the columns
         # had to be renamed to match the restrictions in older file format
-        file = datapath("io", "data", "stata", f"{file}.dta")
+        file = datapath("io", "data", "stata", f"stata4_{version}.dta")
         parsed = self.read_dta(file)
 
         expected = DataFrame.from_records(

From 524c28b2ae93ca7ed6c037d1843bfa853385d585 Mon Sep 17 00:00:00 2001
From: Chris Charlton <c.charlton@bristol.ac.uk>
Date: Tue, 9 Apr 2024 22:00:12 +0100
Subject: [PATCH 4/5] Compare version number inclusively when determining
 whether to use old or new typlist version

---
 pandas/io/stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 37494ff42d650..b87ec94b85bb0 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -1408,7 +1408,7 @@ def _read_old_header(self, first_char: bytes) -> None:
         self._time_stamp = self._get_time_stamp()
 
         # descriptors
-        if self._format_version > 110:
+        if self._format_version >= 111:
             typlist = [int(c) for c in self._path_or_buf.read(self._nvar)]
         else:
             buf = self._path_or_buf.read(self._nvar)

From ee3bae80cef9d1e8ec429cd68053e9b9315a4ed9 Mon Sep 17 00:00:00 2001
From: Chris Charlton <c.charlton@bristol.ac.uk>
Date: Tue, 9 Apr 2024 23:02:32 +0100
Subject: [PATCH 5/5] Add a big-endian version of the test data set

---
 .../tests/io/data/stata/stata-compat-be-110.dta  | Bin 0 -> 1514 bytes
 pandas/tests/io/test_stata.py                    |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 pandas/tests/io/data/stata/stata-compat-be-110.dta

diff --git a/pandas/tests/io/data/stata/stata-compat-be-110.dta b/pandas/tests/io/data/stata/stata-compat-be-110.dta
new file mode 100644
index 0000000000000000000000000000000000000000..0936be478028c463201c542bba7dc27f0cb89cc5
GIT binary patch
literal 1514
zcmc~}WMp9AU|?Wi24Y%+q@>K8w3M9A%)FG;3VfDjTHuof%4Hgw;a8DqY=mDy8h&X8
zh7^JdN(iVZHZ&lh0z+EWLeC%_DvFbBXarZJY6)jDs3MD}xWwniM}kdKH8d#3uxM0Z
zGz3ONU~q&0<Nx3P|JVQjzxT|Uv-Ue@g1{^W21}vDq~w&;wDgS3EGDQ!_xS)-oP?_o
cMyOzBWMO1zU|`$_)N6m)7z}`Bh#*t|0C99Yng9R*

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index c31501a631d99..2f981953a6237 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2010,7 +2010,7 @@ def test_backward_compat(version, datapath):
     tm.assert_frame_equal(old_dta, expected, check_dtype=False)
 
 
-@pytest.mark.parametrize("version", [105, 108, 111, 113, 114, 118])
+@pytest.mark.parametrize("version", [105, 108, 110, 111, 113, 114, 118])
 def test_bigendian(version, datapath):
     ref = datapath("io", "data", "stata", f"stata-compat-{version}.dta")
     big = datapath("io", "data", "stata", f"stata-compat-be-{version}.dta")