Skip to content
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,12 @@ As contributors and maintainers to this project, you are expected to abide by pa
<hr>

[Go to Top](#table-of-contents)
Neva Aydın

Heba Walid Awad

Zeynep Genel

Gül Akkoca

Berat Nevcanoğlu
39 changes: 37 additions & 2 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,21 @@ def write(
from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
if index is not None:
from_pandas_kwargs["preserve_index"] = index

#ekleme yaptığım yer.
table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
if any(isinstance(dtype,pd.StringDtype) for dtype in df.dtype):
string_dtype={
col:str(dtype.storage)
for col,dtype in df.dtypes.items()
if isinstance(dtype,pd.StringDtype)
}
metadata = table.schema.metadata or{}
for col,storage in string_dtypes.items():
key=f"pandas_string_dtype_{col}".encode()
val= storage.encode()
metadata[key]= val
table= table.replace_schema_metadata(metadata)


if df.attrs:
df_metadata = {"PANDAS_ATTRS": json.dumps(df.attrs)}
Expand Down Expand Up @@ -254,13 +267,35 @@ def read(
mode="rb",
)
try:
pa_table = self.api.parquet.read_table(
pa_table = self.api.parquet.read_table(
path_or_handle,
columns=columns,
filesystem=filesystem,
filters=filters,
**kwargs,
)

#eklediğim bölüm pandas_string_dtype_* metadata'larını oku
string_dtypes = {}
metadata = pa_table.schema.metadata
if metadata:
for key, value in metadata.items():
if key.startswith(b"pandas_string_dtype_"):
col_name = key.replace(b"pandas_string_dtype_", b"").decode()
string_dtypes[col_name] = value.decode()

# Eklediğim bölüm: types_mapper fonksiyonu
def types_mapper(pa_type):
for field in pa_table.schema:
if field.type == pa_type:
colname = field.name
if colname in string_dtypes:
return pd.StringDtype(storage=string_dtypes[colname])
return None # fallback to default mapper

if to_pandas_kwargs is None:
to_pandas_kwargs = {}
to_pandas_kwargs["types_mapper"] = types_mapper
with catch_warnings():
filterwarnings(
"ignore",
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,26 @@ def test_basic_subset_columns(self, pa, df_full):
expected=df[["string", "int"]],
read_kwargs={"columns": ["string", "int"]},
)
#ekleme yapılan yeni yer***
@pytest.mark.parametrize("string_storage", ["pyarrow", "python"])
def test_parquet_stringdtype_roundtrip(self, tmp_path, pa):
import pandas as pd
from pandas.testing import assert_frame_equal

df = pd.DataFrame({
"a": pd.Series(["x", "y", "z"], dtype=pd.StringDtype(storage=string_storage))
})

file_path = tmp_path / "stringdtype.parquet"
df.to_parquet(file_path, engine="pyarrow")

result = pd.read_parquet(file_path, engine="pyarrow")

expected_dtype = pd.StringDtype(storage=string_storage)
assert result["a"].dtype == expected_dtype, f"Dtype mismatch: got {result['a'].dtype}, expected {expected_dtype}"

assert_frame_equal(result, df)


def test_to_bytes_without_path_or_buf_provided(self, pa, df_full):
# GH 37105
Expand Down
Loading