From cd5a4c0706176833fd536ee5ef39587b79707e3a Mon Sep 17 00:00:00 2001
From: Jorge Leitao <jorgecarleitao@gmail.com>
Date: Fri, 15 Apr 2022 10:01:19 +0100
Subject: [PATCH] Migrate to latest parquet2 (#923)

---
 Cargo.toml                                    |   4 +-
 arrow-parquet-integration-testing/src/main.rs |   3 +-
 benches/write_parquet.rs                      |   3 +-
 examples/parquet_write.rs                     |   3 +-
 examples/parquet_write_parallel/src/main.rs   |   3 +-
 src/array/fixed_size_binary/mod.rs            |  30 +-
 src/doc/lib.md                                |   3 +-
 src/error.rs                                  |   6 +
 src/io/parquet/mod.rs                         |  10 +-
 .../parquet/read/deserialize/binary/basic.rs  | 199 ++++++++--
 .../parquet/read/deserialize/binary/nested.rs |  64 ++-
 .../parquet/read/deserialize/binary/utils.rs  |  61 +++
 .../parquet/read/deserialize/boolean/basic.rs | 100 +++--
 .../read/deserialize/boolean/nested.rs        |  36 +-
 src/io/parquet/read/deserialize/dictionary.rs | 134 ++++---
 .../deserialize/fixed_size_binary/basic.rs    | 128 ++++--
 src/io/parquet/read/deserialize/mod.rs        |   7 +-
 .../parquet/read/deserialize/nested_utils.rs  |  72 ++--
 .../read/deserialize/primitive/basic.rs       |  83 +++-
 .../read/deserialize/primitive/nested.rs      |  61 ++-
 src/io/parquet/read/deserialize/simple.rs     |  84 ++--
 src/io/parquet/read/deserialize/utils.rs      | 366 ++++++++++++------
 src/io/parquet/read/indexes/binary.rs         |  43 ++
 src/io/parquet/read/indexes/boolean.rs        |  21 +
 .../parquet/read/indexes/fixed_len_binary.rs  |  58 +++
 src/io/parquet/read/indexes/mod.rs            | 141 +++++++
 src/io/parquet/read/indexes/primitive.rs      | 204 ++++++++++
 src/io/parquet/read/mod.rs                    |  30 +-
 src/io/parquet/read/row_group.rs              |  12 +-
 src/io/parquet/read/schema/convert.rs         | 205 +++++-----
 src/io/parquet/read/schema/mod.rs             |   4 -
 src/io/parquet/read/statistics/primitive.rs   |  52 ++-
 src/io/parquet/write/binary/basic.rs          |  20 +-
 src/io/parquet/write/binary/mod.rs            |   1 +
 src/io/parquet/write/binary/nested.rs         |  16 +-
 src/io/parquet/write/boolean/basic.rs         |  13 +-
 src/io/parquet/write/boolean/nested.rs        |  13 +-
 src/io/parquet/write/dictionary.rs            |  97 +++--
 src/io/parquet/write/file.rs                  |  36 +-
 src/io/parquet/write/fixed_len_bytes.rs       |  32 +-
 src/io/parquet/write/mod.rs                   |  65 ++--
 src/io/parquet/write/primitive/basic.rs       |  18 +-
 src/io/parquet/write/primitive/mod.rs         |   1 +
 src/io/parquet/write/primitive/nested.rs      |  15 +-
 src/io/parquet/write/row_group.rs             |  32 +-
 src/io/parquet/write/schema.rs                |  98 ++---
 src/io/parquet/write/sink.rs                  |  28 +-
 src/io/parquet/write/utf8/basic.rs            |  20 +-
 src/io/parquet/write/utf8/mod.rs              |   1 +
 src/io/parquet/write/utf8/nested.rs           |  15 +-
 src/io/parquet/write/utils.rs                 |  62 +--
 tests/it/io/parquet/mod.rs                    |  28 +-
 tests/it/io/parquet/read_indexes.rs           | 223 +++++++++++
 tests/it/io/parquet/write.rs                  |  11 +-
 tests/it/io/parquet/write_async.rs            |   6 +-
 55 files changed, 2115 insertions(+), 966 deletions(-)
 create mode 100644 src/io/parquet/read/indexes/binary.rs
 create mode 100644 src/io/parquet/read/indexes/boolean.rs
 create mode 100644 src/io/parquet/read/indexes/fixed_len_binary.rs
 create mode 100644 src/io/parquet/read/indexes/mod.rs
 create mode 100644 src/io/parquet/read/indexes/primitive.rs
 create mode 100644 tests/it/io/parquet/read_indexes.rs

diff --git a/Cargo.toml b/Cargo.toml
index f79de24413e..95a029f1648 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -53,7 +53,7 @@ hex = { version = "^0.4", optional = true }
 
 # for IPC compression
 lz4 = { version = "1.23.1", optional = true }
-zstd = { version = "0.10", optional = true }
+zstd = { version = "0.11", optional = true }
 
 rand = { version = "0.8", optional = true }
 
@@ -68,7 +68,7 @@ futures = { version = "0.3", optional = true }
 ahash = { version = "0.7", optional = true }
 
 # parquet support
-parquet2 = { version = "0.10", optional = true, default_features = false, features = ["stream"] }
+parquet2 = { version = "0.11", optional = true, default_features = false, features = ["stream"] }
 
 # avro support
 avro-schema = { version = "0.2", optional = true }
diff --git a/arrow-parquet-integration-testing/src/main.rs b/arrow-parquet-integration-testing/src/main.rs
index 787474cce7d..a9f5c649954 100644
--- a/arrow-parquet-integration-testing/src/main.rs
+++ b/arrow-parquet-integration-testing/src/main.rs
@@ -196,8 +196,7 @@ fn main() -> Result<()> {
 
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
     let _ = writer.end(None)?;
 
diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs
index 32b264bfe53..42cf8deec49 100644
--- a/benches/write_parquet.rs
+++ b/benches/write_parquet.rs
@@ -34,8 +34,7 @@ fn write(array: &dyn Array, encoding: Encoding) -> Result<()> {
 
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
     let _ = writer.end(None)?;
     Ok(())
diff --git a/examples/parquet_write.rs b/examples/parquet_write.rs
index df7939563bb..f11e2ec4f29 100644
--- a/examples/parquet_write.rs
+++ b/examples/parquet_write.rs
@@ -30,8 +30,7 @@ fn write_batch(path: &str, schema: Schema, columns: Chunk<Arc<dyn Array>>) -> Re
 
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
     let _size = writer.end(None)?;
     Ok(())
diff --git a/examples/parquet_write_parallel/src/main.rs b/examples/parquet_write_parallel/src/main.rs
index e997f11ce4b..2af167e6279 100644
--- a/examples/parquet_write_parallel/src/main.rs
+++ b/examples/parquet_write_parallel/src/main.rs
@@ -99,8 +99,7 @@ fn parallel_write(path: &str, schema: &Schema, batches: &[Chunk]) -> Result<()>
     // Write the file.
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
     let _size = writer.end(None)?;
 
diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs
index 82695c437e7..f57fe544096 100644
--- a/src/array/fixed_size_binary/mod.rs
+++ b/src/array/fixed_size_binary/mod.rs
@@ -1,4 +1,9 @@
-use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType, error::ArrowError};
+use crate::{
+    bitmap::{Bitmap, MutableBitmap},
+    buffer::Buffer,
+    datatypes::DataType,
+    error::ArrowError,
+};
 
 use super::Array;
 
@@ -274,6 +279,29 @@ impl FixedSizeBinaryArray {
             .unwrap()
             .into()
     }
+
+    /// Creates a [`FixedSizeBinaryArray`] from a slice of arrays of bytes
+    pub fn from_slice<const N: usize, P: AsRef<[[u8; N]]>>(a: P) -> Self {
+        let values = a.as_ref().iter().flatten().copied().collect::<Vec<_>>();
+        Self::new(DataType::FixedSizeBinary(N), values.into(), None)
+    }
+
+    /// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
+    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
+    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
+        let values = slice
+            .as_ref()
+            .iter()
+            .copied()
+            .flat_map(|x| x.unwrap_or([0; N]))
+            .collect::<Vec<_>>();
+        let validity = slice
+            .as_ref()
+            .iter()
+            .map(|x| x.is_some())
+            .collect::<MutableBitmap>();
+        Self::new(DataType::FixedSizeBinary(N), values.into(), validity.into())
+    }
 }
 
 pub trait FixedSizeBinaryValues {
diff --git a/src/doc/lib.md b/src/doc/lib.md
index 9638ff47480..08108b50932 100644
--- a/src/doc/lib.md
+++ b/src/doc/lib.md
@@ -62,8 +62,7 @@ fn main() -> Result<()> {
     // Write the file.
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
     let _ = writer.end(None)?;
     Ok(())
diff --git a/src/error.rs b/src/error.rs
index 22faa164c35..1ee610085a4 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -52,6 +52,12 @@ impl From<std::str::Utf8Error> for ArrowError {
     }
 }
 
+impl From<std::string::FromUtf8Error> for ArrowError {
+    fn from(error: std::string::FromUtf8Error) -> Self {
+        ArrowError::External("".to_string(), Box::new(error))
+    }
+}
+
 impl From<simdutf8::basic::Utf8Error> for ArrowError {
     fn from(error: simdutf8::basic::Utf8Error) -> Self {
         ArrowError::External("".to_string(), Box::new(error))
diff --git a/src/io/parquet/mod.rs b/src/io/parquet/mod.rs
index ba17c825b6d..5ef1042e988 100644
--- a/src/io/parquet/mod.rs
+++ b/src/io/parquet/mod.rs
@@ -6,10 +6,10 @@ pub mod write;
 
 const ARROW_SCHEMA_META_KEY: &str = "ARROW:schema";
 
-impl From<parquet2::error::ParquetError> for ArrowError {
-    fn from(error: parquet2::error::ParquetError) -> Self {
+impl From<parquet2::error::Error> for ArrowError {
+    fn from(error: parquet2::error::Error) -> Self {
         match error {
-            parquet2::error::ParquetError::FeatureNotActive(_, _) => {
+            parquet2::error::Error::FeatureNotActive(_, _) => {
                 let message = "Failed to read a compressed parquet file. \
                     Use the cargo feature \"io_parquet_compression\" to read compressed parquet files."
                     .to_string();
@@ -20,8 +20,8 @@ impl From<parquet2::error::ParquetError> for ArrowError {
     }
 }
 
-impl From<ArrowError> for parquet2::error::ParquetError {
+impl From<ArrowError> for parquet2::error::Error {
     fn from(error: ArrowError) -> Self {
-        parquet2::error::ParquetError::General(error.to_string())
+        parquet2::error::Error::General(error.to_string())
     }
 }
diff --git a/src/io/parquet/read/deserialize/binary/basic.rs b/src/io/parquet/read/deserialize/binary/basic.rs
index 32f491d0b37..0ff4cd31f34 100644
--- a/src/io/parquet/read/deserialize/binary/basic.rs
+++ b/src/io/parquet/read/deserialize/binary/basic.rs
@@ -2,6 +2,7 @@ use std::collections::VecDeque;
 use std::default::Default;
 
 use parquet2::{
+    deserialize::SliceFilteredIter,
     encoding::{hybrid_rle, Encoding},
     page::{BinaryPageDict, DataPage},
     schema::Repetition,
@@ -16,10 +17,11 @@ use crate::{
 };
 
 use super::super::utils::{
-    extend_from_decoder, next, BinaryIter, DecodedState, MaybeNext, OptionalPageValidity,
+    extend_from_decoder, get_selected_rows, next, DecodedState, FilteredOptionalPageValidity,
+    MaybeNext, OptionalPageValidity,
 };
 use super::super::DataPages;
-use super::{super::utils, utils::Binary};
+use super::{super::utils, utils::*};
 
 /*
 fn read_delta_optional<O: Offset>(
@@ -61,16 +63,79 @@ fn read_delta_optional<O: Offset>(
 
 #[derive(Debug)]
 pub(super) struct Required<'a> {
-    pub values: BinaryIter<'a>,
-    pub remaining: usize,
+    pub values: SizedBinaryIter<'a>,
 }
 
 impl<'a> Required<'a> {
     pub fn new(page: &'a DataPage) -> Self {
-        Self {
-            values: BinaryIter::new(page.buffer()),
-            remaining: page.num_values(),
-        }
+        let values = SizedBinaryIter::new(page.buffer(), page.num_values());
+
+        Self { values }
+    }
+
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct FilteredRequired<'a> {
+    pub values: SliceFilteredIter<SizedBinaryIter<'a>>,
+}
+
+impl<'a> FilteredRequired<'a> {
+    pub fn new(page: &'a DataPage) -> Self {
+        let values = SizedBinaryIter::new(page.buffer(), page.num_values());
+
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
+
+        Self { values }
+    }
+
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct RequiredDictionary<'a> {
+    pub values: hybrid_rle::HybridRleDecoder<'a>,
+    pub dict: &'a BinaryPageDict,
+}
+
+impl<'a> RequiredDictionary<'a> {
+    pub fn new(page: &'a DataPage, dict: &'a BinaryPageDict) -> Self {
+        let values = utils::dict_indices_decoder(page);
+
+        Self { dict, values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct FilteredRequiredDictionary<'a> {
+    pub values: SliceFilteredIter<hybrid_rle::HybridRleDecoder<'a>>,
+    pub dict: &'a BinaryPageDict,
+}
+
+impl<'a> FilteredRequiredDictionary<'a> {
+    pub fn new(page: &'a DataPage, dict: &'a BinaryPageDict) -> Self {
+        let values = utils::dict_indices_decoder(page);
+
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
+
+        Self { values, dict }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
     }
 }
 
@@ -82,8 +147,7 @@ pub(super) struct ValuesDictionary<'a> {
 
 impl<'a> ValuesDictionary<'a> {
     pub fn new(page: &'a DataPage, dict: &'a BinaryPageDict) -> Self {
-        let (_, _, indices_buffer) = utils::split_buffer(page);
-        let values = utils::dict_indices_decoder(indices_buffer, page.num_values());
+        let values = utils::dict_indices_decoder(page);
 
         Self { dict, values }
     }
@@ -97,17 +161,25 @@ impl<'a> ValuesDictionary<'a> {
 enum State<'a> {
     Optional(OptionalPageValidity<'a>, BinaryIter<'a>),
     Required(Required<'a>),
-    RequiredDictionary(ValuesDictionary<'a>),
+    RequiredDictionary(RequiredDictionary<'a>),
     OptionalDictionary(OptionalPageValidity<'a>, ValuesDictionary<'a>),
+    FilteredRequired(FilteredRequired<'a>),
+    FilteredOptional(FilteredOptionalPageValidity<'a>, BinaryIter<'a>),
+    FilteredRequiredDictionary(FilteredRequiredDictionary<'a>),
+    FilteredOptionalDictionary(FilteredOptionalPageValidity<'a>, ValuesDictionary<'a>),
 }
 
 impl<'a> utils::PageState<'a> for State<'a> {
     fn len(&self) -> usize {
         match self {
             State::Optional(validity, _) => validity.len(),
-            State::Required(state) => state.remaining,
+            State::Required(state) => state.len(),
             State::RequiredDictionary(values) => values.len(),
             State::OptionalDictionary(optional, _) => optional.len(),
+            State::FilteredRequired(state) => state.len(),
+            State::FilteredOptional(validity, _) => validity.len(),
+            State::FilteredRequiredDictionary(values) => values.len(),
+            State::FilteredOptionalDictionary(optional, _) => optional.len(),
         }
     }
 }
@@ -162,16 +234,22 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
-
-        match (page.encoding(), page.dictionary_page(), is_optional) {
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
-                Ok(State::RequiredDictionary(ValuesDictionary::new(
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
+
+        match (
+            page.encoding(),
+            page.dictionary_page(),
+            is_optional,
+            is_filtered,
+        ) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => {
+                Ok(State::RequiredDictionary(RequiredDictionary::new(
                     page,
                     dict.as_any().downcast_ref().unwrap(),
                 )))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
 
                 Ok(State::OptionalDictionary(
@@ -179,21 +257,41 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
                     ValuesDictionary::new(page, dict),
                 ))
             }
-            (Encoding::Plain, _, true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, true) => {
+                let dict = dict.as_any().downcast_ref().unwrap();
+
+                Ok(State::FilteredRequiredDictionary(
+                    FilteredRequiredDictionary::new(page, dict),
+                ))
+            }
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, true) => {
+                let dict = dict.as_any().downcast_ref().unwrap();
+
+                Ok(State::FilteredOptionalDictionary(
+                    FilteredOptionalPageValidity::new(page),
+                    ValuesDictionary::new(page, dict),
+                ))
+            }
+            (Encoding::Plain, _, true, false) => {
                 let (_, _, values) = utils::split_buffer(page);
 
                 let values = BinaryIter::new(values);
 
                 Ok(State::Optional(OptionalPageValidity::new(page), values))
             }
-            (Encoding::Plain, _, false) => Ok(State::Required(Required::new(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Binary",
-            )),
+            (Encoding::Plain, _, false, false) => Ok(State::Required(Required::new(page))),
+            (Encoding::Plain, _, false, true) => {
+                Ok(State::FilteredRequired(FilteredRequired::new(page)))
+            }
+            (Encoding::Plain, _, true, true) => {
+                let (_, _, values) = utils::split_buffer(page);
+
+                Ok(State::FilteredOptional(
+                    FilteredOptionalPageValidity::new(page),
+                    BinaryIter::new(values),
+                ))
+            }
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -220,7 +318,11 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
                 page_values,
             ),
             State::Required(page) => {
-                page.remaining = page.remaining.saturating_sub(additional);
+                for x in page.values.by_ref().take(additional) {
+                    values.push(x)
+                }
+            }
+            State::FilteredRequired(page) => {
                 for x in page.values.by_ref().take(additional) {
                     values.push(x)
                 }
@@ -257,6 +359,47 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
                     values.push(x)
                 }
             }
+            State::FilteredOptional(page_validity, page_values) => {
+                utils::extend_from_decoder(
+                    validity,
+                    page_validity,
+                    Some(additional),
+                    values,
+                    page_values.by_ref(),
+                );
+            }
+            State::FilteredRequiredDictionary(page) => {
+                let dict_values = page.dict.values();
+                let dict_offsets = page.dict.offsets();
+                let op = move |index: u32| {
+                    let index = index as usize;
+                    let dict_offset_i = dict_offsets[index] as usize;
+                    let dict_offset_ip1 = dict_offsets[index + 1] as usize;
+                    &dict_values[dict_offset_i..dict_offset_ip1]
+                };
+
+                for x in page.values.by_ref().map(op).take(additional) {
+                    values.push(x)
+                }
+            }
+            State::FilteredOptionalDictionary(page_validity, page_values) => {
+                let dict_values = page_values.dict.values();
+                let dict_offsets = page_values.dict.offsets();
+
+                let op = move |index: u32| {
+                    let index = index as usize;
+                    let dict_offset_i = dict_offsets[index] as usize;
+                    let dict_offset_ip1 = dict_offsets[index + 1] as usize;
+                    &dict_values[dict_offset_i..dict_offset_ip1]
+                };
+                utils::extend_from_decoder(
+                    validity,
+                    page_validity,
+                    Some(additional),
+                    values,
+                    &mut page_values.values.by_ref().map(op),
+                )
+            }
         }
     }
 }
diff --git a/src/io/parquet/read/deserialize/binary/nested.rs b/src/io/parquet/read/deserialize/binary/nested.rs
index 522cc63cf49..a0427d08474 100644
--- a/src/io/parquet/read/deserialize/binary/nested.rs
+++ b/src/io/parquet/read/deserialize/binary/nested.rs
@@ -10,7 +10,7 @@ use crate::{
 use super::super::nested_utils::*;
 use super::super::utils::MaybeNext;
 use super::basic::ValuesDictionary;
-use super::utils::Binary;
+use super::utils::*;
 use super::{
     super::utils,
     basic::{finish, Required, TraitBinaryArray},
@@ -19,7 +19,7 @@ use super::{
 #[allow(clippy::large_enum_variant)]
 #[derive(Debug)]
 enum State<'a> {
-    Optional(Optional<'a>, utils::BinaryIter<'a>),
+    Optional(Optional<'a>, BinaryIter<'a>),
     Required(Required<'a>),
     RequiredDictionary(ValuesDictionary<'a>),
     OptionalDictionary(Optional<'a>, ValuesDictionary<'a>),
@@ -29,7 +29,7 @@ impl<'a> utils::PageState<'a> for State<'a> {
     fn len(&self) -> usize {
         match self {
             State::Optional(validity, _) => validity.len(),
-            State::Required(state) => state.remaining,
+            State::Required(state) => state.len(),
             State::RequiredDictionary(required) => required.len(),
             State::OptionalDictionary(optional, _) => optional.len(),
         }
@@ -47,35 +47,35 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
-
-        match (page.encoding(), page.dictionary_page(), is_optional) {
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
+
+        match (
+            page.encoding(),
+            page.dictionary_page(),
+            is_optional,
+            is_filtered,
+        ) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
                 Ok(State::RequiredDictionary(ValuesDictionary::new(page, dict)))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
                 Ok(State::OptionalDictionary(
                     Optional::new(page),
                     ValuesDictionary::new(page, dict),
                 ))
             }
-            (Encoding::Plain, None, true) => {
+            (Encoding::Plain, None, true, false) => {
                 let (_, _, values) = utils::split_buffer(page);
 
-                let values = utils::BinaryIter::new(values);
+                let values = BinaryIter::new(values);
 
                 Ok(State::Optional(Optional::new(page), values))
             }
-            (Encoding::Plain, None, false) => Ok(State::Required(Required::new(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Binary",
-            )),
+            (Encoding::Plain, None, false, false) => Ok(State::Required(Required::new(page))),
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -95,18 +95,12 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
         let (values, validity) = decoded;
         match state {
             State::Optional(page_validity, page_values) => {
-                let max_def = page_validity.max_def();
-                read_optional_values(
-                    page_validity.definition_levels.by_ref(),
-                    max_def,
-                    page_values.by_ref(),
-                    values,
-                    validity,
-                    additional,
-                )
+                let items = page_validity.by_ref().take(additional);
+                let items = Zip::new(items, page_values.by_ref());
+
+                read_optional_values(items, values, validity)
             }
             State::Required(page) => {
-                page.remaining -= additional;
                 for x in page.values.by_ref().take(additional) {
                     values.push(x)
                 }
@@ -126,7 +120,6 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
                 }
             }
             State::OptionalDictionary(page_validity, page_values) => {
-                let max_def = page_validity.max_def();
                 let dict_values = page_values.dict.values();
                 let dict_offsets = page_values.dict.offsets();
 
@@ -136,14 +129,11 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder<O> {
                     let dict_offset_ip1 = dict_offsets[index + 1] as usize;
                     &dict_values[dict_offset_i..dict_offset_ip1]
                 };
-                read_optional_values(
-                    page_validity.definition_levels.by_ref(),
-                    max_def,
-                    page_values.values.by_ref().map(op),
-                    values,
-                    validity,
-                    additional,
-                )
+
+                let items = page_validity.by_ref().take(additional);
+                let items = Zip::new(items, page_values.values.by_ref().map(op));
+
+                read_optional_values(items, values, validity)
             }
         }
     }
diff --git a/src/io/parquet/read/deserialize/binary/utils.rs b/src/io/parquet/read/deserialize/binary/utils.rs
index d967417eb63..370ad6ffae9 100644
--- a/src/io/parquet/read/deserialize/binary/utils.rs
+++ b/src/io/parquet/read/deserialize/binary/utils.rs
@@ -87,3 +87,64 @@ impl<'a, O: Offset> Pushable<&'a [u8]> for Binary<O> {
         self.extend_constant(additional)
     }
 }
+
+#[derive(Debug)]
+pub struct BinaryIter<'a> {
+    values: &'a [u8],
+}
+
+impl<'a> BinaryIter<'a> {
+    pub fn new(values: &'a [u8]) -> Self {
+        Self { values }
+    }
+}
+
+impl<'a> Iterator for BinaryIter<'a> {
+    type Item = &'a [u8];
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.values.is_empty() {
+            return None;
+        }
+        let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
+        self.values = &self.values[4..];
+        let result = &self.values[..length];
+        self.values = &self.values[length..];
+        Some(result)
+    }
+}
+
+#[derive(Debug)]
+pub struct SizedBinaryIter<'a> {
+    iter: BinaryIter<'a>,
+    remaining: usize,
+}
+
+impl<'a> SizedBinaryIter<'a> {
+    pub fn new(values: &'a [u8], size: usize) -> Self {
+        let iter = BinaryIter::new(values);
+        Self {
+            iter,
+            remaining: size,
+        }
+    }
+}
+
+impl<'a> Iterator for SizedBinaryIter<'a> {
+    type Item = &'a [u8];
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining == 0 {
+            return None;
+        } else {
+            self.remaining -= 1
+        };
+        self.iter.next()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.remaining, Some(self.remaining))
+    }
+}
diff --git a/src/io/parquet/read/deserialize/boolean/basic.rs b/src/io/parquet/read/deserialize/boolean/basic.rs
index 705b6d0747a..6f6c31b0f2c 100644
--- a/src/io/parquet/read/deserialize/boolean/basic.rs
+++ b/src/io/parquet/read/deserialize/boolean/basic.rs
@@ -1,6 +1,8 @@
 use std::collections::VecDeque;
 
-use parquet2::{encoding::Encoding, page::DataPage, schema::Repetition};
+use parquet2::{
+    deserialize::SliceFilteredIter, encoding::Encoding, page::DataPage, schema::Repetition,
+};
 
 use crate::{
     array::BooleanArray,
@@ -11,25 +13,19 @@ use crate::{
 
 use super::super::utils;
 use super::super::utils::{
-    extend_from_decoder, next, split_buffer, DecodedState, Decoder, MaybeNext, OptionalPageValidity,
+    extend_from_decoder, get_selected_rows, next, split_buffer, DecodedState, Decoder,
+    FilteredOptionalPageValidity, MaybeNext, OptionalPageValidity,
 };
 use super::super::DataPages;
 
-// The state of an optional DataPage with a boolean physical type
 #[derive(Debug)]
-struct Optional<'a> {
-    values: BitmapIter<'a>,
-    validity: OptionalPageValidity<'a>,
-}
+struct Values<'a>(BitmapIter<'a>);
 
-impl<'a> Optional<'a> {
+impl<'a> Values<'a> {
     pub fn new(page: &'a DataPage) -> Self {
-        let (_, _, values_buffer) = split_buffer(page);
+        let (_, _, values) = split_buffer(page);
 
-        Self {
-            values: BitmapIter::new(values_buffer, 0, values_buffer.len() * 8),
-            validity: OptionalPageValidity::new(page),
-        }
+        Self(BitmapIter::new(values, 0, values.len() * 8))
     }
 }
 
@@ -52,18 +48,44 @@ impl<'a> Required<'a> {
     }
 }
 
+#[derive(Debug)]
+struct FilteredRequired<'a> {
+    values: SliceFilteredIter<BitmapIter<'a>>,
+}
+
+impl<'a> FilteredRequired<'a> {
+    pub fn new(page: &'a DataPage) -> Self {
+        // todo: replace this by an iterator over slices, for faster deserialization
+        let values = BitmapIter::new(page.buffer(), 0, page.num_values());
+
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
+
+        Self { values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
 // The state of a `DataPage` of `Boolean` parquet boolean type
 #[derive(Debug)]
 enum State<'a> {
-    Optional(Optional<'a>),
+    Optional(OptionalPageValidity<'a>, Values<'a>),
     Required(Required<'a>),
+    FilteredRequired(FilteredRequired<'a>),
+    FilteredOptional(FilteredOptionalPageValidity<'a>, Values<'a>),
 }
 
 impl<'a> State<'a> {
     pub fn len(&self) -> usize {
         match self {
-            State::Optional(page) => page.validity.len(),
+            State::Optional(validity, _) => validity.len(),
             State::Required(page) => page.length - page.offset,
+            State::FilteredRequired(page) => page.len(),
+            State::FilteredOptional(optional, _) => optional.len(),
         }
     }
 }
@@ -89,18 +111,23 @@ impl<'a> Decoder<'a> for BooleanDecoder {
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
-
-        match (page.encoding(), is_optional) {
-            (Encoding::Plain, true) => Ok(State::Optional(Optional::new(page))),
-            (Encoding::Plain, false) => Ok(State::Required(Required::new(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Boolean",
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
+
+        match (page.encoding(), is_optional, is_filtered) {
+            (Encoding::Plain, true, false) => Ok(State::Optional(
+                OptionalPageValidity::new(page),
+                Values::new(page),
+            )),
+            (Encoding::Plain, false, false) => Ok(State::Required(Required::new(page))),
+            (Encoding::Plain, true, true) => Ok(State::FilteredOptional(
+                FilteredOptionalPageValidity::new(page),
+                Values::new(page),
             )),
+            (Encoding::Plain, false, true) => {
+                Ok(State::FilteredRequired(FilteredRequired::new(page)))
+            }
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -119,18 +146,33 @@ impl<'a> Decoder<'a> for BooleanDecoder {
     ) {
         let (values, validity) = decoded;
         match state {
-            State::Optional(page) => extend_from_decoder(
+            State::Optional(page_validity, page_values) => extend_from_decoder(
                 validity,
-                &mut page.validity,
+                page_validity,
                 Some(remaining),
                 values,
-                &mut page.values,
+                &mut page_values.0,
             ),
             State::Required(page) => {
                 let remaining = remaining.min(page.length - page.offset);
                 values.extend_from_slice(page.values, page.offset, remaining);
                 page.offset += remaining;
             }
+            State::FilteredRequired(page) => {
+                values.reserve(remaining);
+                for item in page.values.by_ref().take(remaining) {
+                    values.push(item)
+                }
+            }
+            State::FilteredOptional(page_validity, page_values) => {
+                utils::extend_from_decoder(
+                    validity,
+                    page_validity,
+                    Some(remaining),
+                    values,
+                    page_values.0.by_ref(),
+                );
+            }
         }
     }
 }
diff --git a/src/io/parquet/read/deserialize/boolean/nested.rs b/src/io/parquet/read/deserialize/boolean/nested.rs
index 276283f9080..5f30c698a80 100644
--- a/src/io/parquet/read/deserialize/boolean/nested.rs
+++ b/src/io/parquet/read/deserialize/boolean/nested.rs
@@ -65,23 +65,18 @@ impl<'a> Decoder<'a> for BooleanDecoder {
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
 
-        match (page.encoding(), is_optional) {
-            (Encoding::Plain, true) => {
+        match (page.encoding(), is_optional, is_filtered) {
+            (Encoding::Plain, true, false) => {
                 let (_, _, values) = utils::split_buffer(page);
                 let values = BitmapIter::new(values, 0, values.len() * 8);
 
                 Ok(State::Optional(Optional::new(page), values))
             }
-            (Encoding::Plain, false) => Ok(State::Required(Required::new(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Boolean",
-            )),
+            (Encoding::Plain, false, false) => Ok(State::Required(Required::new(page))),
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -96,24 +91,19 @@ impl<'a> Decoder<'a> for BooleanDecoder {
         &self,
         state: &mut State,
         decoded: &mut Self::DecodedState,
-        required: usize,
+        additional: usize,
     ) {
         let (values, validity) = decoded;
         match state {
             State::Optional(page_validity, page_values) => {
-                let max_def = page_validity.max_def();
-                read_optional_values(
-                    page_validity.definition_levels.by_ref(),
-                    max_def,
-                    page_values.by_ref(),
-                    values,
-                    validity,
-                    required,
-                )
+                let items = page_validity.by_ref().take(additional);
+                let items = Zip::new(items, page_values.by_ref());
+
+                read_optional_values(items, values, validity)
             }
             State::Required(page) => {
-                values.extend_from_slice(page.values, page.offset, required);
-                page.offset += required;
+                values.extend_from_slice(page.values, page.offset, additional);
+                page.offset += additional;
             }
         }
     }
diff --git a/src/io/parquet/read/deserialize/dictionary.rs b/src/io/parquet/read/deserialize/dictionary.rs
index 96aa1b8034d..7a2103c96ee 100644
--- a/src/io/parquet/read/deserialize/dictionary.rs
+++ b/src/io/parquet/read/deserialize/dictionary.rs
@@ -1,6 +1,7 @@
 use std::{collections::VecDeque, sync::Arc};
 
 use parquet2::{
+    deserialize::SliceFilteredIter,
     encoding::{hybrid_rle::HybridRleDecoder, Encoding},
     page::{DataPage, DictPage},
     schema::Repetition,
@@ -13,75 +14,59 @@ use crate::{
 };
 
 use super::{
-    utils::{self, extend_from_decoder, DecodedState, Decoder, MaybeNext, OptionalPageValidity},
+    utils::{
+        self, dict_indices_decoder, extend_from_decoder, get_selected_rows, DecodedState, Decoder,
+        FilteredOptionalPageValidity, MaybeNext, OptionalPageValidity,
+    },
     DataPages,
 };
 
 // The state of a `DataPage` of `Primitive` parquet primitive type
 #[derive(Debug)]
-pub enum State<'a, K>
-where
-    K: DictionaryKey,
-{
-    Optional(Optional<'a, K>),
-    Required(Required<'a, K>),
+pub enum State<'a> {
+    Optional(Optional<'a>),
+    Required(Required<'a>),
+    FilteredRequired(FilteredRequired<'a>),
+    FilteredOptional(FilteredOptionalPageValidity<'a>, HybridRleDecoder<'a>),
 }
 
-#[inline]
-fn values_iter1<K>(
-    indices_buffer: &[u8],
-    additional: usize,
-) -> std::iter::Map<HybridRleDecoder, Box<dyn Fn(u32) -> K>>
-where
-    K: DictionaryKey,
-{
-    // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32),
-    // SPEC: followed by the values encoded using RLE/Bit packed described above (with the given bit width).
-    let bit_width = indices_buffer[0];
-    let indices_buffer = &indices_buffer[1..];
+#[derive(Debug)]
+pub struct Required<'a> {
+    values: HybridRleDecoder<'a>,
+}
 
-    let new_indices = HybridRleDecoder::new(indices_buffer, bit_width as u32, additional);
-    new_indices.map(Box::new(|x| K::from_u32(x).unwrap()) as _)
+impl<'a> Required<'a> {
+    fn new(page: &'a DataPage) -> Self {
+        let values = dict_indices_decoder(page);
+        Self { values }
+    }
 }
 
 #[derive(Debug)]
-pub struct Required<'a, K>
-where
-    K: DictionaryKey,
-{
-    values: std::iter::Map<HybridRleDecoder<'a>, Box<dyn Fn(u32) -> K + 'a>>,
+pub struct FilteredRequired<'a> {
+    values: SliceFilteredIter<HybridRleDecoder<'a>>,
 }
 
-impl<'a, K> Required<'a, K>
-where
-    K: DictionaryKey,
-{
+impl<'a> FilteredRequired<'a> {
     fn new(page: &'a DataPage) -> Self {
-        let (_, _, indices_buffer) = utils::split_buffer(page);
+        let values = dict_indices_decoder(page);
 
-        let values = values_iter1(indices_buffer, page.num_values());
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
 
         Self { values }
     }
 }
 
 #[derive(Debug)]
-pub struct Optional<'a, K>
-where
-    K: DictionaryKey,
-{
-    values: std::iter::Map<HybridRleDecoder<'a>, Box<dyn Fn(u32) -> K + 'a>>,
+pub struct Optional<'a> {
+    values: HybridRleDecoder<'a>,
     validity: OptionalPageValidity<'a>,
 }
 
-impl<'a, K> Optional<'a, K>
-where
-    K: DictionaryKey,
-{
+impl<'a> Optional<'a> {
     fn new(page: &'a DataPage) -> Self {
-        let (_, _, indices_buffer) = utils::split_buffer(page);
-
-        let values = values_iter1(indices_buffer, page.num_values());
+        let values = dict_indices_decoder(page);
 
         Self {
             values,
@@ -90,14 +75,13 @@ where
     }
 }
 
-impl<'a, K> utils::PageState<'a> for State<'a, K>
-where
-    K: DictionaryKey,
-{
+impl<'a> utils::PageState<'a> for State<'a> {
     fn len(&self) -> usize {
         match self {
             State::Optional(optional) => optional.validity.len(),
             State::Required(required) => required.values.size_hint().0,
+            State::FilteredRequired(required) => required.values.size_hint().0,
+            State::FilteredOptional(validity, _) => validity.len(),
         }
     }
 }
@@ -126,27 +110,31 @@ impl<'a, K> utils::Decoder<'a> for PrimitiveDecoder<K>
 where
     K: DictionaryKey,
 {
-    type State = State<'a, K>;
+    type State = State<'a>;
     type DecodedState = (Vec<K>, MutableBitmap);
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
 
-        match (page.encoding(), is_optional) {
-            (Encoding::PlainDictionary | Encoding::RleDictionary, false) => {
+        match (page.encoding(), is_optional, is_filtered) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, false, false) => {
                 Ok(State::Required(Required::new(page)))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, true, false) => {
                 Ok(State::Optional(Optional::new(page)))
             }
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Primitive",
-            )),
+            (Encoding::PlainDictionary | Encoding::RleDictionary, false, true) => {
+                Ok(State::FilteredRequired(FilteredRequired::new(page)))
+            }
+            (Encoding::PlainDictionary | Encoding::RleDictionary, true, true) => {
+                Ok(State::FilteredOptional(
+                    FilteredOptionalPageValidity::new(page),
+                    dict_indices_decoder(page),
+                ))
+            }
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -170,10 +158,30 @@ where
                 &mut page.validity,
                 Some(remaining),
                 values,
-                &mut page.values,
+                &mut page.values.by_ref().map(|x| K::from_u32(x).unwrap()),
             ),
             State::Required(page) => {
-                values.extend(page.values.by_ref().take(remaining));
+                values.extend(
+                    page.values
+                        .by_ref()
+                        .map(|x| K::from_u32(x).unwrap())
+                        .take(remaining),
+                );
+            }
+            State::FilteredOptional(page_validity, page_values) => extend_from_decoder(
+                validity,
+                page_validity,
+                Some(remaining),
+                values,
+                &mut page_values.by_ref().map(|x| K::from_u32(x).unwrap()),
+            ),
+            State::FilteredRequired(page) => {
+                values.extend(
+                    page.values
+                        .by_ref()
+                        .map(|x| K::from_u32(x).unwrap())
+                        .take(remaining),
+                );
             }
         }
     }
@@ -233,7 +241,7 @@ pub(super) fn next_dict<
             };
 
             // there is a new page => consume the page from the start
-            let maybe_page = PrimitiveDecoder::default().build_state(page);
+            let maybe_page = PrimitiveDecoder::<K>::default().build_state(page);
             let page = match maybe_page {
                 Ok(page) => page,
                 Err(e) => return MaybeNext::Some(Err(e)),
diff --git a/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs b/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
index 260fafa7eb3..c4645e595aa 100644
--- a/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
+++ b/src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
@@ -1,6 +1,7 @@
 use std::collections::VecDeque;
 
 use parquet2::{
+    deserialize::SliceFilteredIter,
     encoding::{hybrid_rle, Encoding},
     page::{DataPage, FixedLenByteArrayPageDict},
     schema::Repetition,
@@ -11,8 +12,9 @@ use crate::{
 };
 
 use super::super::utils::{
-    dict_indices_decoder, extend_from_decoder, next, not_implemented, split_buffer, DecodedState,
-    Decoder, MaybeNext, OptionalPageValidity, PageState, Pushable,
+    dict_indices_decoder, extend_from_decoder, get_selected_rows, next, not_implemented,
+    split_buffer, DecodedState, Decoder, FilteredOptionalPageValidity, MaybeNext,
+    OptionalPageValidity, PageState, Pushable,
 };
 use super::super::DataPages;
 use super::utils::FixedSizeBinary;
@@ -37,33 +39,59 @@ impl<'a> Optional<'a> {
 
 struct Required<'a> {
     pub values: std::slice::ChunksExact<'a, u8>,
-    pub remaining: usize,
 }
 
 impl<'a> Required<'a> {
     fn new(page: &'a DataPage, size: usize) -> Self {
-        Self {
-            values: page.buffer().chunks_exact(size),
-            remaining: page.num_values(),
-        }
+        let values = page.buffer();
+        assert_eq!(values.len() % size, 0);
+        let values = values.chunks_exact(size);
+        Self { values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
+struct FilteredRequired<'a> {
+    pub values: SliceFilteredIter<std::slice::ChunksExact<'a, u8>>,
+}
+
+impl<'a> FilteredRequired<'a> {
+    fn new(page: &'a DataPage, size: usize) -> Self {
+        let values = page.buffer();
+        assert_eq!(values.len() % size, 0);
+        let values = values.chunks_exact(size);
+
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
+
+        Self { values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
     }
 }
 
 struct RequiredDictionary<'a> {
     pub values: hybrid_rle::HybridRleDecoder<'a>,
-    pub remaining: usize,
     dict: &'a FixedLenByteArrayPageDict,
 }
 
 impl<'a> RequiredDictionary<'a> {
     fn new(page: &'a DataPage, dict: &'a FixedLenByteArrayPageDict) -> Self {
-        let values = dict_indices_decoder(page.buffer(), page.num_values());
+        let values = dict_indices_decoder(page);
 
-        Self {
-            values,
-            remaining: page.num_values(),
-            dict,
-        }
+        Self { dict, values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
     }
 }
 
@@ -75,9 +103,7 @@ struct OptionalDictionary<'a> {
 
 impl<'a> OptionalDictionary<'a> {
     fn new(page: &'a DataPage, dict: &'a FixedLenByteArrayPageDict) -> Self {
-        let (_, _, indices_buffer) = split_buffer(page);
-
-        let values = dict_indices_decoder(indices_buffer, page.num_values());
+        let values = dict_indices_decoder(page);
 
         Self {
             values,
@@ -92,15 +118,22 @@ enum State<'a> {
     Required(Required<'a>),
     RequiredDictionary(RequiredDictionary<'a>),
     OptionalDictionary(OptionalDictionary<'a>),
+    FilteredRequired(FilteredRequired<'a>),
+    FilteredOptional(
+        FilteredOptionalPageValidity<'a>,
+        std::slice::ChunksExact<'a, u8>,
+    ),
 }
 
 impl<'a> PageState<'a> for State<'a> {
     fn len(&self) -> usize {
         match self {
             State::Optional(state) => state.validity.len(),
-            State::Required(state) => state.remaining,
-            State::RequiredDictionary(state) => state.remaining,
+            State::Required(state) => state.len(),
+            State::RequiredDictionary(state) => state.len(),
             State::OptionalDictionary(state) => state.validity.len(),
+            State::FilteredRequired(state) => state.len(),
+            State::FilteredOptional(state, _) => state.len(),
         }
     }
 }
@@ -121,30 +154,45 @@ impl<'a> Decoder<'a> for BinaryDecoder {
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
-
-        match (page.encoding(), page.dictionary_page(), is_optional) {
-            (Encoding::Plain, None, true) => Ok(State::Optional(Optional::new(page, self.size))),
-            (Encoding::Plain, None, false) => Ok(State::Required(Required::new(page, self.size))),
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
+
+        match (
+            page.encoding(),
+            page.dictionary_page(),
+            is_optional,
+            is_filtered,
+        ) {
+            (Encoding::Plain, None, true, false) => {
+                Ok(State::Optional(Optional::new(page, self.size)))
+            }
+            (Encoding::Plain, None, false, false) => {
+                Ok(State::Required(Required::new(page, self.size)))
+            }
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => {
                 Ok(State::RequiredDictionary(RequiredDictionary::new(
                     page,
                     dict.as_any().downcast_ref().unwrap(),
                 )))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => {
                 Ok(State::OptionalDictionary(OptionalDictionary::new(
                     page,
                     dict.as_any().downcast_ref().unwrap(),
                 )))
             }
-            _ => Err(not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "FixedBinary",
+            (Encoding::Plain, None, false, true) => Ok(State::FilteredRequired(
+                FilteredRequired::new(page, self.size),
             )),
+            (Encoding::Plain, _, true, true) => {
+                let (_, _, values) = split_buffer(page);
+
+                Ok(State::FilteredOptional(
+                    FilteredOptionalPageValidity::new(page),
+                    values.chunks_exact(self.size),
+                ))
+            }
+            _ => Err(not_implemented(page)),
         }
     }
 
@@ -172,7 +220,11 @@ impl<'a> Decoder<'a> for BinaryDecoder {
                 &mut page.values,
             ),
             State::Required(page) => {
-                page.remaining -= remaining;
+                for x in page.values.by_ref().take(remaining) {
+                    values.push(x)
+                }
+            }
+            State::FilteredRequired(page) => {
                 for x in page.values.by_ref().take(remaining) {
                     values.push(x)
                 }
@@ -201,11 +253,19 @@ impl<'a> Decoder<'a> for BinaryDecoder {
                     &dict_values[index * size..(index + 1) * size]
                 };
 
-                page.remaining -= remaining;
                 for x in page.values.by_ref().map(op).take(remaining) {
                     values.push(x)
                 }
             }
+            State::FilteredOptional(page_validity, page_values) => {
+                extend_from_decoder(
+                    validity,
+                    page_validity,
+                    Some(remaining),
+                    values,
+                    page_values.by_ref(),
+                );
+            }
         }
     }
 }
diff --git a/src/io/parquet/read/deserialize/mod.rs b/src/io/parquet/read/deserialize/mod.rs
index bb688a12f89..0d5706354c0 100644
--- a/src/io/parquet/read/deserialize/mod.rs
+++ b/src/io/parquet/read/deserialize/mod.rs
@@ -17,6 +17,7 @@ use crate::{
 };
 
 use self::nested_utils::{InitNested, NestedArrayIter, NestedState};
+use parquet2::schema::types::PrimitiveType;
 use simple::page_iter_to_arrays;
 
 use super::*;
@@ -27,7 +28,7 @@ pub fn get_page_iterator<R: Read + Seek>(
     reader: R,
     pages_filter: Option<PageFilter>,
     buffer: Vec<u8>,
-) -> Result<PageIterator<R>> {
+) -> Result<PageReader<R>> {
     Ok(_get_page_iterator(
         column_metadata,
         reader,
@@ -76,7 +77,7 @@ fn create_list(
 
 fn columns_to_iter_recursive<'a, I: 'a>(
     mut columns: Vec<I>,
-    mut types: Vec<&ParquetType>,
+    mut types: Vec<&PrimitiveType>,
     field: Field,
     mut init: Vec<InitNested>,
     chunk_size: usize,
@@ -238,7 +239,7 @@ fn field_to_init(field: &Field) -> Vec<InitNested> {
 /// The arrays are guaranteed to be at most of size `chunk_size` and data type `field.data_type`.
 pub fn column_iter_to_arrays<'a, I: 'a>(
     columns: Vec<I>,
-    types: Vec<&ParquetType>,
+    types: Vec<&PrimitiveType>,
     field: Field,
     chunk_size: usize,
 ) -> Result<ArrayIter<'a>>
diff --git a/src/io/parquet/read/deserialize/nested_utils.rs b/src/io/parquet/read/deserialize/nested_utils.rs
index cb1f977cd1f..b74f7c24734 100644
--- a/src/io/parquet/read/deserialize/nested_utils.rs
+++ b/src/io/parquet/read/deserialize/nested_utils.rs
@@ -7,6 +7,7 @@ use parquet2::{
 use crate::{array::Array, bitmap::MutableBitmap, error::Result};
 
 use super::super::DataPages;
+pub use super::utils::Zip;
 use super::utils::{split_buffer, DecodedState, Decoder, MaybeNext, Pushable};
 
 /// trait describing deserialized repetition and definition levels
@@ -203,31 +204,19 @@ impl Nested for NestedStruct {
     }
 }
 
-pub(super) fn read_optional_values<D, C, G, P>(
-    def_levels: D,
-    max_def: u32,
-    mut new_values: G,
-    values: &mut P,
-    validity: &mut MutableBitmap,
-    mut remaining: usize,
-) where
-    D: Iterator<Item = u32>,
-    G: Iterator<Item = C>,
+pub(super) fn read_optional_values<D, C, P>(items: D, values: &mut P, validity: &mut MutableBitmap)
+where
+    D: Iterator<Item = Option<C>>,
     C: Default,
     P: Pushable<C>,
 {
-    for def in def_levels {
-        if def == max_def {
-            values.push(new_values.next().unwrap());
+    for item in items {
+        if let Some(item) = item {
+            values.push(item);
             validity.push(true);
-            remaining -= 1;
-        } else if def == max_def - 1 {
-            values.push(C::default());
+        } else {
+            values.push_null();
             validity.push(false);
-            remaining -= 1;
-        }
-        if remaining == 0 {
-            break;
         }
     }
 }
@@ -283,8 +272,8 @@ impl<'a> NestedPage<'a> {
     pub fn new(page: &'a DataPage) -> Self {
         let (rep_levels, def_levels, _) = split_buffer(page);
 
-        let max_rep_level = page.descriptor().max_rep_level();
-        let max_def_level = page.descriptor().max_def_level();
+        let max_rep_level = page.descriptor.max_rep_level;
+        let max_def_level = page.descriptor.max_def_level;
 
         let reps =
             HybridRleDecoder::new(rep_levels, get_bit_width(max_rep_level), page.num_values());
@@ -440,37 +429,44 @@ fn extend_offsets2<'a>(page: &mut NestedPage<'a>, nested: &mut NestedState, addi
     }
 }
 
-// The state of an optional DataPage with a boolean physical type
 #[derive(Debug)]
 pub struct Optional<'a> {
-    pub definition_levels: HybridRleDecoder<'a>,
-    max_def: u32,
+    iter: HybridRleDecoder<'a>,
+    max: u32,
+}
+
+impl<'a> Iterator for Optional<'a> {
+    type Item = bool;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next().and_then(|x| {
+            if x == self.max {
+                Some(true)
+            } else if x == self.max - 1 {
+                Some(false)
+            } else {
+                self.next()
+            }
+        })
+    }
 }
 
 impl<'a> Optional<'a> {
     pub fn new(page: &'a DataPage) -> Self {
         let (_, def_levels, _) = split_buffer(page);
 
-        let max_def = page.descriptor().max_def_level();
+        let max_def = page.descriptor.max_def_level;
 
         Self {
-            definition_levels: HybridRleDecoder::new(
-                def_levels,
-                get_bit_width(max_def),
-                page.num_values(),
-            ),
-            max_def: max_def as u32,
+            iter: HybridRleDecoder::new(def_levels, get_bit_width(max_def), page.num_values()),
+            max: max_def as u32,
         }
     }
 
     #[inline]
     pub fn len(&self) -> usize {
-        self.definition_levels.size_hint().0
-    }
-
-    #[inline]
-    pub fn max_def(&self) -> u32 {
-        self.max_def
+        unreachable!();
     }
 }
 
diff --git a/src/io/parquet/read/deserialize/primitive/basic.rs b/src/io/parquet/read/deserialize/primitive/basic.rs
index ed6d30419f9..abb766a6968 100644
--- a/src/io/parquet/read/deserialize/primitive/basic.rs
+++ b/src/io/parquet/read/deserialize/primitive/basic.rs
@@ -1,6 +1,7 @@
 use std::collections::VecDeque;
 
 use parquet2::{
+    deserialize::SliceFilteredIter,
     encoding::{hybrid_rle, Encoding},
     page::{DataPage, PrimitivePageDict},
     schema::Repetition,
@@ -14,9 +15,33 @@ use crate::{
 };
 
 use super::super::utils;
-use super::super::utils::OptionalPageValidity;
+use super::super::utils::{get_selected_rows, FilteredOptionalPageValidity, OptionalPageValidity};
 use super::super::DataPages;
 
+#[derive(Debug)]
+struct FilteredRequiredValues<'a> {
+    values: SliceFilteredIter<std::slice::ChunksExact<'a, u8>>,
+}
+
+impl<'a> FilteredRequiredValues<'a> {
+    pub fn new<P: ParquetNativeType>(page: &'a DataPage) -> Self {
+        let (_, _, values) = utils::split_buffer(page);
+        assert_eq!(values.len() % std::mem::size_of::<P>(), 0);
+
+        let values = values.chunks_exact(std::mem::size_of::<P>());
+
+        let rows = get_selected_rows(page);
+        let values = SliceFilteredIter::new(values, rows);
+
+        Self { values }
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.values.size_hint().0
+    }
+}
+
 #[derive(Debug)]
 pub(super) struct Values<'a> {
     pub values: std::slice::ChunksExact<'a, u8>,
@@ -51,8 +76,7 @@ where
     P: ParquetNativeType,
 {
     pub fn new(page: &'a DataPage, dict: &'a PrimitivePageDict<P>) -> Self {
-        let (_, _, indices_buffer) = utils::split_buffer(page);
-        let values = utils::dict_indices_decoder(indices_buffer, page.num_values());
+        let values = utils::dict_indices_decoder(page);
 
         Self {
             dict: dict.values(),
@@ -76,6 +100,8 @@ where
     Required(Values<'a>),
     RequiredDictionary(ValuesDictionary<'a, P>),
     OptionalDictionary(OptionalPageValidity<'a>, ValuesDictionary<'a, P>),
+    FilteredRequired(FilteredRequiredValues<'a>),
+    FilteredOptional(FilteredOptionalPageValidity<'a>, Values<'a>),
 }
 
 impl<'a, P> utils::PageState<'a> for State<'a, P>
@@ -88,6 +114,8 @@ where
             State::Required(values) => values.len(),
             State::RequiredDictionary(values) => values.len(),
             State::OptionalDictionary(optional, _) => optional.len(),
+            State::FilteredRequired(values) => values.len(),
+            State::FilteredOptional(optional, _) => optional.len(),
         }
     }
 }
@@ -137,14 +165,20 @@ where
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
 
-        match (page.encoding(), page.dictionary_page(), is_optional) {
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+        match (
+            page.encoding(),
+            page.dictionary_page(),
+            is_optional,
+            is_filtered,
+        ) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
                 Ok(State::RequiredDictionary(ValuesDictionary::new(page, dict)))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
 
                 Ok(State::OptionalDictionary(
@@ -152,20 +186,21 @@ where
                     ValuesDictionary::new(page, dict),
                 ))
             }
-            (Encoding::Plain, _, true) => {
+            (Encoding::Plain, _, true, false) => {
                 let validity = OptionalPageValidity::new(page);
                 let values = Values::new::<P>(page);
 
                 Ok(State::Optional(validity, values))
             }
-            (Encoding::Plain, _, false) => Ok(State::Required(Values::new::<P>(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Primitive",
+            (Encoding::Plain, _, false, false) => Ok(State::Required(Values::new::<P>(page))),
+            (Encoding::Plain, _, false, true) => Ok(State::FilteredRequired(
+                FilteredRequiredValues::new::<P>(page),
             )),
+            (Encoding::Plain, _, true, true) => Ok(State::FilteredOptional(
+                FilteredOptionalPageValidity::new(page),
+                Values::new::<P>(page),
+            )),
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -214,6 +249,24 @@ where
                 let op1 = |index: u32| page.dict[index as usize];
                 values.extend(page.values.by_ref().map(op1).map(self.op).take(remaining));
             }
+            State::FilteredRequired(page) => {
+                values.extend(
+                    page.values
+                        .by_ref()
+                        .map(decode)
+                        .map(self.op)
+                        .take(remaining),
+                );
+            }
+            State::FilteredOptional(page_validity, page_values) => {
+                utils::extend_from_decoder(
+                    validity,
+                    page_validity,
+                    Some(remaining),
+                    values,
+                    page_values.values.by_ref().map(decode).map(self.op),
+                );
+            }
         }
     }
 }
diff --git a/src/io/parquet/read/deserialize/primitive/nested.rs b/src/io/parquet/read/deserialize/primitive/nested.rs
index a16217c9b18..0aff18e2578 100644
--- a/src/io/parquet/read/deserialize/primitive/nested.rs
+++ b/src/io/parquet/read/deserialize/primitive/nested.rs
@@ -81,31 +81,31 @@ where
 
     fn build_state(&self, page: &'a DataPage) -> Result<Self::State> {
         let is_optional =
-            page.descriptor().type_().get_basic_info().repetition() == &Repetition::Optional;
-
-        match (page.encoding(), page.dictionary_page(), is_optional) {
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false) => {
+            page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+        let is_filtered = page.selected_rows().is_some();
+
+        match (
+            page.encoding(),
+            page.dictionary_page(),
+            is_optional,
+            is_filtered,
+        ) {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), false, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
                 Ok(State::RequiredDictionary(ValuesDictionary::new(page, dict)))
             }
-            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true) => {
+            (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict), true, false) => {
                 let dict = dict.as_any().downcast_ref().unwrap();
                 Ok(State::OptionalDictionary(
                     Optional::new(page),
                     ValuesDictionary::new(page, dict),
                 ))
             }
-            (Encoding::Plain, _, true) => {
+            (Encoding::Plain, _, true, false) => {
                 Ok(State::Optional(Optional::new(page), Values::new::<P>(page)))
             }
-            (Encoding::Plain, _, false) => Ok(State::Required(Values::new::<P>(page))),
-            _ => Err(utils::not_implemented(
-                &page.encoding(),
-                is_optional,
-                false,
-                "any",
-                "Primitive",
-            )),
+            (Encoding::Plain, _, false, false) => Ok(State::Required(Values::new::<P>(page))),
+            _ => Err(utils::not_implemented(page)),
         }
     }
 
@@ -120,20 +120,15 @@ where
         &self,
         state: &mut Self::State,
         decoded: &mut Self::DecodedState,
-        remaining: usize,
+        additional: usize,
     ) {
         let (values, validity) = decoded;
         match state {
             State::Optional(page_validity, page_values) => {
-                let max_def = page_validity.max_def();
-                read_optional_values(
-                    page_validity.definition_levels.by_ref(),
-                    max_def,
-                    page_values.values.by_ref().map(decode).map(self.op),
-                    values,
-                    validity,
-                    remaining,
-                )
+                let items = page_validity.by_ref().take(additional);
+                let items = Zip::new(items, page_values.values.by_ref().map(decode).map(self.op));
+
+                read_optional_values(items, values, validity)
             }
             State::Required(page) => {
                 values.extend(
@@ -141,24 +136,20 @@ where
                         .by_ref()
                         .map(decode)
                         .map(self.op)
-                        .take(remaining),
+                        .take(additional),
                 );
             }
             State::RequiredDictionary(page) => {
                 let op1 = |index: u32| page.dict[index as usize];
-                values.extend(page.values.by_ref().map(op1).map(self.op).take(remaining));
+                values.extend(page.values.by_ref().map(op1).map(self.op).take(additional));
             }
             State::OptionalDictionary(page_validity, page_values) => {
-                let max_def = page_validity.max_def();
                 let op1 = |index: u32| page_values.dict[index as usize];
-                read_optional_values(
-                    page_validity.definition_levels.by_ref(),
-                    max_def,
-                    page_values.values.by_ref().map(op1).map(self.op),
-                    values,
-                    validity,
-                    remaining,
-                )
+
+                let items = page_validity.by_ref().take(additional);
+                let items = Zip::new(items, page_values.values.by_ref().map(op1).map(self.op));
+
+                read_optional_values(items, values, validity)
             }
         }
     }
diff --git a/src/io/parquet/read/deserialize/simple.rs b/src/io/parquet/read/deserialize/simple.rs
index 9544f16bff1..d3a50b44c90 100644
--- a/src/io/parquet/read/deserialize/simple.rs
+++ b/src/io/parquet/read/deserialize/simple.rs
@@ -2,7 +2,7 @@ use std::sync::Arc;
 
 use parquet2::{
     schema::types::{
-        LogicalType, ParquetType, PhysicalType, TimeUnit as ParquetTimeUnit, TimestampType,
+        PhysicalType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
     },
     types::int96_to_i64_ns,
 };
@@ -60,24 +60,14 @@ where
 /// of [`DataType`] `data_type` and `chunk_size`.
 pub fn page_iter_to_arrays<'a, I: 'a + DataPages>(
     pages: I,
-    type_: &ParquetType,
+    type_: &PrimitiveType,
     data_type: DataType,
     chunk_size: usize,
 ) -> Result<ArrayIter<'a>> {
     use DataType::*;
 
-    let (physical_type, logical_type) = if let ParquetType::PrimitiveType {
-        physical_type,
-        logical_type,
-        ..
-    } = type_
-    {
-        (physical_type, logical_type)
-    } else {
-        return Err(ArrowError::InvalidArgumentError(
-            "page_iter_to_arrays can only be called with a parquet primitive type".into(),
-        ));
-    };
+    let physical_type = &type_.physical_type;
+    let logical_type = &type_.logical_type;
 
     Ok(match data_type.to_logical_type() {
         Null => null::iter_to_arrays(pages, data_type, chunk_size),
@@ -240,7 +230,7 @@ pub fn page_iter_to_arrays<'a, I: 'a + DataPages>(
 fn timestamp<'a, I: 'a + DataPages>(
     pages: I,
     physical_type: &PhysicalType,
-    logical_type: &Option<LogicalType>,
+    logical_type: &Option<PrimitiveLogicalType>,
     data_type: DataType,
     chunk_size: usize,
     time_unit: TimeUnit,
@@ -267,35 +257,41 @@ fn timestamp<'a, I: 'a + DataPages>(
 
     let iter = primitive::Iter::new(pages, data_type, chunk_size, |x: i64| x);
 
-    let unit = if let Some(LogicalType::TIMESTAMP(TimestampType { unit, .. })) = logical_type {
+    let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = logical_type {
         unit
     } else {
         return Ok(dyn_iter(iden(iter)));
     };
 
     Ok(match (unit, time_unit) {
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Second) => dyn_iter(op(iter, |x| x / 1_000)),
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Second) => dyn_iter(op(iter, |x| x / 1_000_000)),
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Second) => dyn_iter(op(iter, |x| x * 1_000_000_000)),
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Second) => dyn_iter(op(iter, |x| x / 1_000)),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Second) => dyn_iter(op(iter, |x| x / 1_000_000)),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Second) => {
+            dyn_iter(op(iter, |x| x / 1_000_000_000))
+        }
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Millisecond) => dyn_iter(iden(iter)),
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Millisecond) => dyn_iter(op(iter, |x| x / 1_000)),
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Millisecond) => dyn_iter(op(iter, |x| x / 1_000_000)),
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Millisecond) => dyn_iter(iden(iter)),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Millisecond) => dyn_iter(op(iter, |x| x / 1_000)),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Millisecond) => {
+            dyn_iter(op(iter, |x| x / 1_000_000))
+        }
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Microsecond) => dyn_iter(op(iter, |x| x * 1_000)),
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Microsecond) => dyn_iter(iden(iter)),
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Microsecond) => dyn_iter(op(iter, |x| x / 1_000)),
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Microsecond) => dyn_iter(op(iter, |x| x * 1_000)),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Microsecond) => dyn_iter(iden(iter)),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Microsecond) => dyn_iter(op(iter, |x| x / 1_000)),
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Nanosecond) => dyn_iter(op(iter, |x| x * 1_000_000)),
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Nanosecond) => dyn_iter(op(iter, |x| x * 1_000)),
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Nanosecond) => dyn_iter(iden(iter)),
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Nanosecond) => {
+            dyn_iter(op(iter, |x| x * 1_000_000))
+        }
+        (ParquetTimeUnit::Microseconds, TimeUnit::Nanosecond) => dyn_iter(op(iter, |x| x * 1_000)),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Nanosecond) => dyn_iter(iden(iter)),
     })
 }
 
 fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
     pages: I,
     physical_type: &PhysicalType,
-    logical_type: &Option<LogicalType>,
+    logical_type: &Option<PrimitiveLogicalType>,
     data_type: DataType,
     chunk_size: usize,
     time_unit: TimeUnit,
@@ -315,7 +311,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
         }
     };
 
-    let unit = if let Some(LogicalType::TIMESTAMP(TimestampType { unit, .. })) = logical_type {
+    let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = logical_type {
         unit
     } else {
         return Ok(dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
@@ -327,7 +323,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
     };
 
     Ok(match (unit, time_unit) {
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Second) => {
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Second) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -335,7 +331,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x / 1_000,
             ))
         }
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Second) => {
+        (ParquetTimeUnit::Microseconds, TimeUnit::Second) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -343,16 +339,16 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x / 1_000_000,
             ))
         }
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Second) => {
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Second) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
                 chunk_size,
-                |x: i64| x * 1_000_000_000,
+                |x: i64| x / 1_000_000_000,
             ))
         }
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Millisecond) => {
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Millisecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -360,7 +356,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x,
             ))
         }
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Millisecond) => {
+        (ParquetTimeUnit::Microseconds, TimeUnit::Millisecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -368,7 +364,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x / 1_000,
             ))
         }
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Millisecond) => {
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Millisecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -377,7 +373,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
             ))
         }
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Microsecond) => {
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Microsecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -385,7 +381,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x * 1_000,
             ))
         }
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Microsecond) => {
+        (ParquetTimeUnit::Microseconds, TimeUnit::Microsecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -393,7 +389,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x,
             ))
         }
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Microsecond) => {
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Microsecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -402,7 +398,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
             ))
         }
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Nanosecond) => {
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Nanosecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -410,7 +406,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x * 1_000_000,
             ))
         }
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Nanosecond) => {
+        (ParquetTimeUnit::Microseconds, TimeUnit::Nanosecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -418,7 +414,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
                 |x: i64| x * 1_000,
             ))
         }
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Nanosecond) => {
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Nanosecond) => {
             dyn_iter(primitive::DictIter::<K, _, _, _, _>::new(
                 pages,
                 data_type,
@@ -432,7 +428,7 @@ fn timestamp_dict<'a, K: DictionaryKey, I: 'a + DataPages>(
 fn dict_read<'a, K: DictionaryKey, I: 'a + DataPages>(
     iter: I,
     physical_type: &PhysicalType,
-    logical_type: &Option<LogicalType>,
+    logical_type: &Option<PrimitiveLogicalType>,
     data_type: DataType,
     chunk_size: usize,
 ) -> Result<ArrayIter<'a>> {
diff --git a/src/io/parquet/read/deserialize/utils.rs b/src/io/parquet/read/deserialize/utils.rs
index f9e9cfe20be..7bda733684f 100644
--- a/src/io/parquet/read/deserialize/utils.rs
+++ b/src/io/parquet/read/deserialize/utils.rs
@@ -1,9 +1,12 @@
 use std::collections::VecDeque;
-use std::convert::TryInto;
 
-use parquet2::encoding::{hybrid_rle, Encoding};
+use parquet2::deserialize::{
+    FilteredHybridEncoded, FilteredHybridRleDecoderIter, HybridDecoderBitmapIter, HybridEncoded,
+};
+use parquet2::encoding::hybrid_rle;
+use parquet2::indexes::Interval;
 use parquet2::page::{split_buffer as _split_buffer, DataPage};
-use streaming_iterator::{convert, Convert, StreamingIterator};
+use parquet2::schema::Repetition;
 
 use crate::bitmap::utils::BitmapIter;
 use crate::bitmap::MutableBitmap;
@@ -11,51 +14,29 @@ use crate::error::ArrowError;
 
 use super::super::DataPages;
 
-#[derive(Debug)]
-pub struct BinaryIter<'a> {
-    values: &'a [u8],
-}
-
-impl<'a> BinaryIter<'a> {
-    pub fn new(values: &'a [u8]) -> Self {
-        Self { values }
-    }
-}
-
-impl<'a> Iterator for BinaryIter<'a> {
-    type Item = &'a [u8];
-
-    #[inline]
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.values.is_empty() {
-            return None;
-        }
-        let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
-        self.values = &self.values[4..];
-        let result = &self.values[..length];
-        self.values = &self.values[length..];
-        Some(result)
-    }
-}
-
-pub fn not_implemented(
-    encoding: &Encoding,
-    is_optional: bool,
-    has_dict: bool,
-    version: &str,
-    physical_type: &str,
-) -> ArrowError {
+pub fn not_implemented(page: &DataPage) -> ArrowError {
+    let is_optional = page.descriptor.primitive_type.field_info.repetition == Repetition::Optional;
+    let is_filtered = page.selected_rows().is_some();
     let required = if is_optional { "optional" } else { "required" };
-    let dict = if has_dict { ", dictionary-encoded" } else { "" };
+    let is_filtered = if is_filtered { ", index-filtered" } else { "" };
+    let dict = if page.dictionary_page().is_some() {
+        ", dictionary-encoded"
+    } else {
+        ""
+    };
     ArrowError::NotYetImplemented(format!(
-        "Decoding \"{:?}\"-encoded{} {} {} pages is not yet implemented for {}",
-        encoding, dict, required, version, physical_type
+        "Decoding {:?} \"{:?}\"-encoded{} {} {} parquet pages",
+        page.descriptor.primitive_type.physical_type,
+        page.encoding(),
+        dict,
+        required,
+        is_filtered,
     ))
 }
 
 #[inline]
 pub fn split_buffer(page: &DataPage) -> (&[u8], &[u8], &[u8]) {
-    _split_buffer(page, page.descriptor())
+    _split_buffer(page)
 }
 
 /// A private trait representing structs that can receive elements.
@@ -111,43 +92,210 @@ impl<A: Copy + Default> Pushable<A> for Vec<A> {
     }
 }
 
-#[derive(Debug)]
+/// The state of a partially deserialized page
+pub(super) trait PageValidity<'a> {
+    fn next_limited(&mut self, limit: usize) -> Option<FilteredHybridEncoded<'a>>;
+}
+
+#[derive(Debug, Clone)]
+pub struct FilteredOptionalPageValidity<'a> {
+    iter: FilteredHybridRleDecoderIter<'a>,
+    current: Option<(FilteredHybridEncoded<'a>, usize)>,
+}
+
+impl<'a> FilteredOptionalPageValidity<'a> {
+    pub fn new(page: &'a DataPage) -> Self {
+        let (_, validity, _) = split_buffer(page);
+
+        let iter = hybrid_rle::Decoder::new(validity, 1);
+        let iter = HybridDecoderBitmapIter::new(iter, page.num_values());
+        let selected_rows = get_selected_rows(page);
+        let iter = FilteredHybridRleDecoderIter::new(iter, selected_rows);
+
+        Self {
+            iter,
+            current: None,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.iter.len()
+    }
+}
+
+pub fn get_selected_rows(page: &DataPage) -> VecDeque<Interval> {
+    page.selected_rows()
+        .unwrap_or(&[Interval::new(0, page.num_values())])
+        .iter()
+        .copied()
+        .collect()
+}
+
+impl<'a> PageValidity<'a> for FilteredOptionalPageValidity<'a> {
+    fn next_limited(&mut self, limit: usize) -> Option<FilteredHybridEncoded<'a>> {
+        let (run, own_offset) = if let Some((run, offset)) = self.current {
+            (run, offset)
+        } else {
+            // a new run
+            let run = self.iter.next()?; // no run -> None
+            self.current = Some((run, 0));
+            return self.next_limited(limit);
+        };
+
+        match run {
+            FilteredHybridEncoded::Bitmap {
+                values,
+                offset,
+                length,
+            } => {
+                let run_length = length - own_offset;
+
+                let length = limit.min(run_length);
+
+                if length == run_length {
+                    self.current = None;
+                } else {
+                    self.current = Some((run, own_offset + length));
+                }
+
+                Some(FilteredHybridEncoded::Bitmap {
+                    values,
+                    offset,
+                    length,
+                })
+            }
+            FilteredHybridEncoded::Repeated { is_set, length } => {
+                let run_length = length - own_offset;
+
+                let length = limit.min(run_length);
+
+                if length == run_length {
+                    self.current = None;
+                } else {
+                    self.current = Some((run, own_offset + length));
+                }
+
+                Some(FilteredHybridEncoded::Repeated { is_set, length })
+            }
+            FilteredHybridEncoded::Skipped(set) => {
+                self.current = None;
+                Some(FilteredHybridEncoded::Skipped(set))
+            }
+        }
+    }
+}
+
+pub struct Zip<V, I> {
+    validity: V,
+    values: I,
+}
+
+impl<V, I> Zip<V, I> {
+    pub fn new(validity: V, values: I) -> Self {
+        Self { validity, values }
+    }
+}
+
+impl<T, V: Iterator<Item = bool>, I: Iterator<Item = T>> Iterator for Zip<V, I> {
+    type Item = Option<T>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.validity
+            .next()
+            .map(|x| if x { self.values.next() } else { None })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.validity.size_hint()
+    }
+}
+
+#[derive(Debug, Clone)]
 pub struct OptionalPageValidity<'a> {
-    validity: Convert<hybrid_rle::Decoder<'a>>,
-    // invariants:
-    // * run_offset < length
-    // * consumed < length
-    run_offset: usize,
-    consumed: usize,
-    length: usize,
+    iter: HybridDecoderBitmapIter<'a>,
+    current: Option<(HybridEncoded<'a>, usize)>,
 }
 
 impl<'a> OptionalPageValidity<'a> {
-    #[inline]
     pub fn new(page: &'a DataPage) -> Self {
         let (_, validity, _) = split_buffer(page);
 
-        let validity = convert(hybrid_rle::Decoder::new(validity, 1));
+        let iter = hybrid_rle::Decoder::new(validity, 1);
+        let iter = HybridDecoderBitmapIter::new(iter, page.num_values());
         Self {
-            validity,
-            run_offset: 0,
-            consumed: 0,
-            length: page.num_values(),
+            iter,
+            current: None,
         }
     }
 
-    #[inline]
     pub fn len(&self) -> usize {
-        self.length - self.consumed
+        self.iter.len()
+            + self
+                .current
+                .as_ref()
+                .map(|(run, offset)| run.len() - offset)
+                .unwrap_or_default()
+    }
+
+    fn next_limited(&mut self, limit: usize) -> Option<FilteredHybridEncoded<'a>> {
+        let (run, offset) = if let Some((run, offset)) = self.current {
+            (run, offset)
+        } else {
+            // a new run
+            let run = self.iter.next()?; // no run -> None
+            self.current = Some((run, 0));
+            return self.next_limited(limit);
+        };
+
+        match run {
+            HybridEncoded::Bitmap(values, length) => {
+                let run_length = length - offset;
+
+                let length = limit.min(run_length);
+
+                if length == run_length {
+                    self.current = None;
+                } else {
+                    self.current = Some((run, offset + length));
+                }
+
+                Some(FilteredHybridEncoded::Bitmap {
+                    values,
+                    offset,
+                    length,
+                })
+            }
+            HybridEncoded::Repeated(is_set, run_length) => {
+                let run_length = run_length - offset;
+
+                let length = limit.min(run_length);
+
+                if length == run_length {
+                    self.current = None;
+                } else {
+                    self.current = Some((run, offset + length));
+                }
+
+                Some(FilteredHybridEncoded::Repeated { is_set, length })
+            }
+        }
+    }
+}
+
+impl<'a> PageValidity<'a> for OptionalPageValidity<'a> {
+    fn next_limited(&mut self, limit: usize) -> Option<FilteredHybridEncoded<'a>> {
+        self.next_limited(limit)
     }
 }
 
 /// Extends a [`Pushable`] from an iterator of non-null values and an hybrid-rle decoder
 pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable<T>, I: Iterator<Item = T>>(
     validity: &mut MutableBitmap,
-    page_validity: &mut OptionalPageValidity<'a>,
+    page_validity: &mut dyn PageValidity<'a>,
     limit: Option<usize>,
-    values: &mut P,
+    pushable: &mut P,
     mut values_iter: I,
 ) {
     let limit = limit.unwrap_or(usize::MAX);
@@ -155,69 +303,42 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable<T>, I: Iterator<It
     // todo: remove `consumed_here` and compute next limit from `consumed`
     let mut consumed_here = 0;
     while consumed_here < limit {
-        if page_validity.run_offset == 0 {
-            page_validity.validity.advance()
-        }
-
-        if let Some(run) = page_validity.validity.get() {
-            match run {
-                hybrid_rle::HybridEncoded::Bitpacked(pack) => {
-                    // a pack has at most `pack.len() * 8` bits
-                    // during execution, we may end in the middle of a pack (run_offset != 0)
-                    // the remaining items in the pack is dictacted by a combination
-                    // of the page length, the offset in the pack, and where we are in the page
-                    let pack_size = pack.len() * 8 - page_validity.run_offset;
-                    let remaining = page_validity.length - page_validity.consumed;
-                    let length = std::cmp::min(pack_size, remaining);
-
-                    let additional = limit.min(length);
-
-                    // consume `additional` items
-                    let iter = BitmapIter::new(pack, page_validity.run_offset, additional);
-                    for is_valid in iter {
-                        if is_valid {
-                            values.push(values_iter.next().unwrap())
-                        } else {
-                            values.push_null()
-                        };
-                    }
-
-                    validity.extend_from_slice(pack, page_validity.run_offset, additional);
-
-                    if additional == length {
-                        page_validity.run_offset = 0
-                    } else {
-                        page_validity.run_offset += additional;
-                    };
-                    consumed_here += additional;
-                    page_validity.consumed += additional;
-                }
-                &hybrid_rle::HybridEncoded::Rle(value, length) => {
-                    let is_set = value[0] == 1;
-                    let length = length - page_validity.run_offset;
-
-                    // the number of elements that will be consumed in this (run, iteration)
-                    let additional = limit.min(length);
-
-                    validity.extend_constant(additional, is_set);
-                    if is_set {
-                        (0..additional).for_each(|_| values.push(values_iter.next().unwrap()));
+        let run = page_validity.next_limited(limit);
+        let run = if let Some(run) = run { run } else { break };
+
+        match run {
+            FilteredHybridEncoded::Bitmap {
+                values,
+                offset,
+                length,
+            } => {
+                // consume `length` items
+                let iter = BitmapIter::new(values, offset, length);
+                let iter = Zip::new(iter, &mut values_iter);
+
+                for item in iter {
+                    if let Some(item) = item {
+                        pushable.push(item)
                     } else {
-                        values.extend_constant(additional, T::default());
+                        pushable.push_null()
                     }
+                }
+                validity.extend_from_slice(values, offset, length);
 
-                    if additional == length {
-                        page_validity.run_offset = 0
-                    } else {
-                        page_validity.run_offset += additional;
-                    };
-                    consumed_here += additional;
-                    page_validity.consumed += additional;
+                consumed_here += length;
+            }
+            FilteredHybridEncoded::Repeated { is_set, length } => {
+                validity.extend_constant(length, is_set);
+                if is_set {
+                    (0..length).for_each(|_| pushable.push(values_iter.next().unwrap()));
+                } else {
+                    pushable.extend_constant(length, T::default());
                 }
-            };
-        } else {
-            break;
-        }
+
+                consumed_here += length;
+            }
+            FilteredHybridEncoded::Skipped(valids) => for _ in values_iter.by_ref().take(valids) {},
+        };
     }
 }
 
@@ -335,14 +456,13 @@ pub(super) fn next<'a, I: DataPages, D: Decoder<'a>>(
 }
 
 #[inline]
-pub(super) fn dict_indices_decoder(
-    indices_buffer: &[u8],
-    additional: usize,
-) -> hybrid_rle::HybridRleDecoder {
+pub(super) fn dict_indices_decoder(page: &DataPage) -> hybrid_rle::HybridRleDecoder {
+    let (_, _, indices_buffer) = split_buffer(page);
+
     // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32),
     // SPEC: followed by the values encoded using RLE/Bit packed described above (with the given bit width).
     let bit_width = indices_buffer[0];
     let indices_buffer = &indices_buffer[1..];
 
-    hybrid_rle::HybridRleDecoder::new(indices_buffer, bit_width as u32, additional)
+    hybrid_rle::HybridRleDecoder::new(indices_buffer, bit_width as u32, page.num_values())
 }
diff --git a/src/io/parquet/read/indexes/binary.rs b/src/io/parquet/read/indexes/binary.rs
new file mode 100644
index 00000000000..f67e94d86c3
--- /dev/null
+++ b/src/io/parquet/read/indexes/binary.rs
@@ -0,0 +1,43 @@
+use parquet2::indexes::PageIndex;
+
+use crate::{
+    array::{Array, BinaryArray, PrimitiveArray, Utf8Array},
+    datatypes::{DataType, PhysicalType},
+    error::ArrowError,
+    trusted_len::TrustedLen,
+};
+
+use super::ColumnIndex;
+
+pub fn deserialize(
+    indexes: &[PageIndex<Vec<u8>>],
+    data_type: &DataType,
+) -> Result<ColumnIndex, ArrowError> {
+    Ok(ColumnIndex {
+        min: deserialize_binary_iter(indexes.iter().map(|index| index.min.as_ref()), data_type)?,
+        max: deserialize_binary_iter(indexes.iter().map(|index| index.max.as_ref()), data_type)?,
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    })
+}
+
+fn deserialize_binary_iter<'a, I: TrustedLen<Item = Option<&'a Vec<u8>>>>(
+    iter: I,
+    data_type: &DataType,
+) -> Result<Box<dyn Array>, ArrowError> {
+    match data_type.to_physical_type() {
+        PhysicalType::LargeBinary => Ok(Box::new(BinaryArray::<i64>::from_iter(iter))),
+        PhysicalType::Utf8 => {
+            let iter = iter.map(|x| x.map(|x| std::str::from_utf8(x)).transpose());
+            Ok(Box::new(Utf8Array::<i32>::try_from_trusted_len_iter(iter)?))
+        }
+        PhysicalType::LargeUtf8 => {
+            let iter = iter.map(|x| x.map(|x| std::str::from_utf8(x)).transpose());
+            Ok(Box::new(Utf8Array::<i64>::try_from_trusted_len_iter(iter)?))
+        }
+        _ => Ok(Box::new(BinaryArray::<i32>::from_iter(iter))),
+    }
+}
diff --git a/src/io/parquet/read/indexes/boolean.rs b/src/io/parquet/read/indexes/boolean.rs
new file mode 100644
index 00000000000..501c9e63a64
--- /dev/null
+++ b/src/io/parquet/read/indexes/boolean.rs
@@ -0,0 +1,21 @@
+use parquet2::indexes::PageIndex;
+
+use crate::array::{BooleanArray, PrimitiveArray};
+
+use super::ColumnIndex;
+
+pub fn deserialize(indexes: &[PageIndex<bool>]) -> ColumnIndex {
+    ColumnIndex {
+        min: Box::new(BooleanArray::from_trusted_len_iter(
+            indexes.iter().map(|index| index.min),
+        )),
+        max: Box::new(BooleanArray::from_trusted_len_iter(
+            indexes.iter().map(|index| index.max),
+        )),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
diff --git a/src/io/parquet/read/indexes/fixed_len_binary.rs b/src/io/parquet/read/indexes/fixed_len_binary.rs
new file mode 100644
index 00000000000..c4499814d12
--- /dev/null
+++ b/src/io/parquet/read/indexes/fixed_len_binary.rs
@@ -0,0 +1,58 @@
+use parquet2::indexes::PageIndex;
+
+use crate::{
+    array::{Array, FixedSizeBinaryArray, MutableFixedSizeBinaryArray, PrimitiveArray},
+    datatypes::{DataType, PhysicalType, PrimitiveType},
+    trusted_len::TrustedLen,
+};
+
+use super::ColumnIndex;
+
+pub fn deserialize(indexes: &[PageIndex<Vec<u8>>], data_type: DataType) -> ColumnIndex {
+    ColumnIndex {
+        min: deserialize_binary_iter(
+            indexes.iter().map(|index| index.min.as_ref()),
+            data_type.clone(),
+        ),
+        max: deserialize_binary_iter(indexes.iter().map(|index| index.max.as_ref()), data_type),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
+
+fn deserialize_binary_iter<'a, I: TrustedLen<Item = Option<&'a Vec<u8>>>>(
+    iter: I,
+    data_type: DataType,
+) -> Box<dyn Array> {
+    match data_type.to_physical_type() {
+        PhysicalType::Primitive(PrimitiveType::Int128) => {
+            Box::new(PrimitiveArray::from_trusted_len_iter(iter.map(|v| {
+                v.map(|x| {
+                    // Copy the fixed-size byte value to the start of a 16 byte stack
+                    // allocated buffer, then use an arithmetic right shift to fill in
+                    // MSBs, which accounts for leading 1's in negative (two's complement)
+                    // values.
+                    let n = x.len();
+                    let mut bytes = [0u8; 16];
+                    bytes[..n].copy_from_slice(x);
+                    i128::from_be_bytes(bytes) >> (8 * (16 - n))
+                })
+            })))
+        }
+        _ => {
+            let mut a = MutableFixedSizeBinaryArray::from_data(
+                data_type,
+                Vec::with_capacity(iter.size_hint().0),
+                None,
+            );
+            for item in iter {
+                a.push(item);
+            }
+            let a: FixedSizeBinaryArray = a.into();
+            Box::new(a)
+        }
+    }
+}
diff --git a/src/io/parquet/read/indexes/mod.rs b/src/io/parquet/read/indexes/mod.rs
new file mode 100644
index 00000000000..329fed1a3ff
--- /dev/null
+++ b/src/io/parquet/read/indexes/mod.rs
@@ -0,0 +1,141 @@
+use parquet2::indexes::{
+    BooleanIndex, ByteIndex, FixedLenByteIndex, Index as ParquetIndex, NativeIndex,
+};
+use parquet2::metadata::ColumnChunkMetaData;
+use parquet2::read::read_columns_indexes as _read_columns_indexes;
+use parquet2::schema::types::PhysicalType as ParquetPhysicalType;
+
+mod binary;
+mod boolean;
+mod fixed_len_binary;
+mod primitive;
+
+use std::io::{Read, Seek};
+
+use crate::datatypes::Field;
+use crate::{
+    array::{Array, UInt64Array},
+    datatypes::DataType,
+    error::ArrowError,
+};
+
+/// Arrow-deserialized [`ColumnIndex`] containing the minimum and maximum value
+/// of every page from the column.
+/// # Invariants
+/// The minimum and maximum are guaranteed to have the same logical type.
+#[derive(Debug, PartialEq)]
+pub struct ColumnIndex {
+    /// The minimum values in the pages
+    pub min: Box<dyn Array>,
+    /// The maximum values in the pages
+    pub max: Box<dyn Array>,
+    /// The number of null values in the pages
+    pub null_count: UInt64Array,
+}
+
+impl ColumnIndex {
+    /// The [`DataType`] of the column index.
+    pub fn data_type(&self) -> &DataType {
+        self.min.data_type()
+    }
+}
+
+/// Given a sequence of [`ParquetIndex`] representing the page indexes of each column in the
+/// parquet file, returns the page-level statistics as arrow's arrays, as a vector of [`ColumnIndex`].
+///
+/// This function maps timestamps, decimal types, etc. accordingly.
+/// # Implementation
+/// This function is CPU-bounded but `O(P)` where `P` is the total number of pages in all columns.
+/// # Error
+/// This function errors iff the value is not deserializable to arrow (e.g. invalid utf-8)
+fn deserialize(
+    indexes: &[Box<dyn ParquetIndex>],
+    data_types: Vec<DataType>,
+) -> Result<Vec<ColumnIndex>, ArrowError> {
+    indexes
+        .iter()
+        .zip(data_types.into_iter())
+        .map(|(index, data_type)| match index.physical_type() {
+            ParquetPhysicalType::Boolean => {
+                let index = index.as_any().downcast_ref::<BooleanIndex>().unwrap();
+                Ok(boolean::deserialize(&index.indexes))
+            }
+            ParquetPhysicalType::Int32 => {
+                let index = index.as_any().downcast_ref::<NativeIndex<i32>>().unwrap();
+                Ok(primitive::deserialize_i32(&index.indexes, data_type))
+            }
+            ParquetPhysicalType::Int64 => {
+                let index = index.as_any().downcast_ref::<NativeIndex<i64>>().unwrap();
+                Ok(primitive::deserialize_i64(
+                    &index.indexes,
+                    &index.primitive_type,
+                    data_type,
+                ))
+            }
+            ParquetPhysicalType::Int96 => {
+                let index = index
+                    .as_any()
+                    .downcast_ref::<NativeIndex<[u32; 3]>>()
+                    .unwrap();
+                Ok(primitive::deserialize_i96(&index.indexes, data_type))
+            }
+            ParquetPhysicalType::Float => {
+                let index = index.as_any().downcast_ref::<NativeIndex<f32>>().unwrap();
+                Ok(primitive::deserialize_id(&index.indexes, data_type))
+            }
+            ParquetPhysicalType::Double => {
+                let index = index.as_any().downcast_ref::<NativeIndex<f64>>().unwrap();
+                Ok(primitive::deserialize_id(&index.indexes, data_type))
+            }
+            ParquetPhysicalType::ByteArray => {
+                let index = index.as_any().downcast_ref::<ByteIndex>().unwrap();
+                binary::deserialize(&index.indexes, &data_type)
+            }
+            ParquetPhysicalType::FixedLenByteArray(_) => {
+                let index = index.as_any().downcast_ref::<FixedLenByteIndex>().unwrap();
+                Ok(fixed_len_binary::deserialize(&index.indexes, data_type))
+            }
+        })
+        .collect()
+}
+
+// recursive function to get the corresponding leaf data_types corresponding to the
+// parquet columns
+fn populate_dt(data_type: &DataType, container: &mut Vec<DataType>) {
+    match data_type.to_logical_type() {
+        DataType::List(inner) => populate_dt(&inner.data_type, container),
+        DataType::LargeList(inner) => populate_dt(&inner.data_type, container),
+        DataType::Dictionary(_, inner, _) => populate_dt(inner, container),
+        DataType::Struct(fields) => fields
+            .iter()
+            .for_each(|f| populate_dt(&f.data_type, container)),
+        _ => container.push(data_type.clone()),
+    }
+}
+
+/// Reads the column indexes from the reader assuming a valid set of derived Arrow fields
+/// for all parquet the columns in the file.
+///
+/// This function is expected to be used to filter out parquet pages.
+///
+/// # Implementation
+/// This function is IO-bounded and calls `reader.read_exact` exactly once.
+/// # Error
+/// Errors iff the indexes can't be read or their deserialization to arrow is incorrect (e.g. invalid utf-8)
+pub fn read_columns_indexes<R: Read + Seek>(
+    reader: &mut R,
+    chunks: &[ColumnChunkMetaData],
+    fields: &[Field],
+) -> Result<Vec<ColumnIndex>, ArrowError> {
+    let indexes = _read_columns_indexes(reader, chunks)?;
+
+    // map arrow fields to the corresponding columns in parquet taking into account
+    // that fields may be nested but parquet column indexes are only leaf columns
+    let mut data_types = vec![];
+    fields
+        .iter()
+        .map(|f| &f.data_type)
+        .for_each(|d| populate_dt(d, &mut data_types));
+
+    deserialize(&indexes, data_types)
+}
diff --git a/src/io/parquet/read/indexes/primitive.rs b/src/io/parquet/read/indexes/primitive.rs
new file mode 100644
index 00000000000..103d67bbcf1
--- /dev/null
+++ b/src/io/parquet/read/indexes/primitive.rs
@@ -0,0 +1,204 @@
+use parquet2::indexes::PageIndex;
+use parquet2::schema::types::{PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit};
+use parquet2::types::int96_to_i64_ns;
+
+use crate::array::{Array, MutablePrimitiveArray, PrimitiveArray};
+use crate::datatypes::{DataType, TimeUnit};
+use crate::trusted_len::TrustedLen;
+use crate::types::NativeType;
+
+use super::ColumnIndex;
+
+#[inline]
+fn deserialize_int32<I: TrustedLen<Item = Option<i32>>>(
+    iter: I,
+    data_type: DataType,
+) -> Box<dyn Array> {
+    use DataType::*;
+    match data_type.to_logical_type() {
+        UInt8 => Box::new(
+            PrimitiveArray::<u8>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u8)))
+                .to(data_type),
+        ) as _,
+        UInt16 => Box::new(
+            PrimitiveArray::<u16>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u16)))
+                .to(data_type),
+        ),
+        UInt32 => Box::new(
+            PrimitiveArray::<u32>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u32)))
+                .to(data_type),
+        ),
+        Decimal(_, _) => Box::new(
+            PrimitiveArray::<i128>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128)))
+                .to(data_type),
+        ),
+        _ => Box::new(PrimitiveArray::<i32>::from_trusted_len_iter(iter).to(data_type)),
+    }
+}
+
+#[inline]
+fn timestamp(
+    array: &mut MutablePrimitiveArray<i64>,
+    time_unit: TimeUnit,
+    logical_type: Option<PrimitiveLogicalType>,
+) {
+    let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = logical_type {
+        unit
+    } else {
+        return;
+    };
+
+    match (unit, time_unit) {
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Second) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Second) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000_000),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Second) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000_000_000),
+
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Millisecond) => {}
+        (ParquetTimeUnit::Microseconds, TimeUnit::Millisecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Millisecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000_000),
+
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Microsecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x *= 1_000),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Microsecond) => {}
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Microsecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000),
+
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Nanosecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x *= 1_000_000),
+        (ParquetTimeUnit::Microseconds, TimeUnit::Nanosecond) => array
+            .values_mut_slice()
+            .iter_mut()
+            .for_each(|x| *x /= 1_000),
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Nanosecond) => {}
+    }
+}
+
+#[inline]
+fn deserialize_int64<I: TrustedLen<Item = Option<i64>>>(
+    iter: I,
+    primitive_type: &PrimitiveType,
+    data_type: DataType,
+) -> Box<dyn Array> {
+    use DataType::*;
+    match data_type.to_logical_type() {
+        UInt64 => Box::new(
+            PrimitiveArray::<u64>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as u64)))
+                .to(data_type),
+        ) as _,
+        Decimal(_, _) => Box::new(
+            PrimitiveArray::<i128>::from_trusted_len_iter(iter.map(|x| x.map(|x| x as i128)))
+                .to(data_type),
+        ) as _,
+        Timestamp(time_unit, _) => {
+            let mut array =
+                MutablePrimitiveArray::<i64>::from_trusted_len_iter(iter).to(data_type.clone());
+
+            timestamp(&mut array, *time_unit, primitive_type.logical_type);
+
+            let array: PrimitiveArray<i64> = array.into();
+
+            Box::new(array)
+        }
+        _ => Box::new(PrimitiveArray::<i64>::from_trusted_len_iter(iter).to(data_type)),
+    }
+}
+
+#[inline]
+fn deserialize_int96<I: TrustedLen<Item = Option<[u32; 3]>>>(
+    iter: I,
+    data_type: DataType,
+) -> Box<dyn Array> {
+    Box::new(
+        PrimitiveArray::<i64>::from_trusted_len_iter(iter.map(|x| x.map(int96_to_i64_ns)))
+            .to(data_type),
+    )
+}
+
+#[inline]
+fn deserialize_id_s<T: NativeType, I: TrustedLen<Item = Option<T>>>(
+    iter: I,
+    data_type: DataType,
+) -> Box<dyn Array> {
+    Box::new(PrimitiveArray::<T>::from_trusted_len_iter(iter).to(data_type))
+}
+
+pub fn deserialize_i32(indexes: &[PageIndex<i32>], data_type: DataType) -> ColumnIndex {
+    ColumnIndex {
+        min: deserialize_int32(indexes.iter().map(|index| index.min), data_type.clone()),
+        max: deserialize_int32(indexes.iter().map(|index| index.max), data_type),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
+
+pub fn deserialize_i64(
+    indexes: &[PageIndex<i64>],
+    primitive_type: &PrimitiveType,
+    data_type: DataType,
+) -> ColumnIndex {
+    ColumnIndex {
+        min: deserialize_int64(
+            indexes.iter().map(|index| index.min),
+            primitive_type,
+            data_type.clone(),
+        ),
+        max: deserialize_int64(
+            indexes.iter().map(|index| index.max),
+            primitive_type,
+            data_type,
+        ),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
+
+pub fn deserialize_i96(indexes: &[PageIndex<[u32; 3]>], data_type: DataType) -> ColumnIndex {
+    ColumnIndex {
+        min: deserialize_int96(indexes.iter().map(|index| index.min), data_type.clone()),
+        max: deserialize_int96(indexes.iter().map(|index| index.max), data_type),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
+
+pub fn deserialize_id<T: NativeType>(indexes: &[PageIndex<T>], data_type: DataType) -> ColumnIndex {
+    ColumnIndex {
+        min: deserialize_id_s(indexes.iter().map(|index| index.min), data_type.clone()),
+        max: deserialize_id_s(indexes.iter().map(|index| index.max), data_type),
+        null_count: PrimitiveArray::from_trusted_len_iter(
+            indexes
+                .iter()
+                .map(|index| index.null_count.map(|x| x as u64)),
+        ),
+    }
+}
diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs
index 14bcbef3c02..dfac4c38df4 100644
--- a/src/io/parquet/read/mod.rs
+++ b/src/io/parquet/read/mod.rs
@@ -3,45 +3,47 @@
 
 mod deserialize;
 mod file;
+mod indexes;
 mod row_group;
 pub mod schema;
 pub mod statistics;
 
+use std::{
+    io::{Read, Seek},
+    sync::Arc,
+};
+
 use futures::{AsyncRead, AsyncSeek};
 
 // re-exports of parquet2's relevant APIs
 pub use parquet2::{
-    error::ParquetError,
+    error::Error as ParquetError,
     fallible_streaming_iterator,
     metadata::{ColumnChunkMetaData, ColumnDescriptor, RowGroupMetaData},
     page::{CompressedDataPage, DataPage, DataPageHeader},
     read::{
         decompress, get_column_iterator, get_page_iterator as _get_page_iterator,
-        get_page_stream as _get_page_stream, read_metadata as _read_metadata,
-        read_metadata_async as _read_metadata_async, BasicDecompressor, ColumnChunkIter,
-        Decompressor, MutStreamingIterator, PageFilter, PageIterator, ReadColumnIterator, State,
+        get_page_stream as _get_page_stream, read_columns_indexes as _read_columns_indexes,
+        read_metadata as _read_metadata, read_metadata_async as _read_metadata_async,
+        read_pages_locations, BasicDecompressor, ColumnChunkIter, Decompressor,
+        MutStreamingIterator, PageFilter, PageReader, ReadColumnIterator, State,
     },
     schema::types::{
-        LogicalType, ParquetType, PhysicalType, PrimitiveConvertedType,
-        TimeUnit as ParquetTimeUnit, TimestampType,
+        GroupLogicalType, ParquetType, PhysicalType, PrimitiveConvertedType, PrimitiveLogicalType,
+        TimeUnit as ParquetTimeUnit,
     },
     types::int96_to_i64_ns,
     FallibleStreamingIterator,
 };
 
+use crate::{array::Array, error::Result};
+
 pub use deserialize::{column_iter_to_arrays, get_page_iterator};
 pub use file::{FileReader, RowGroupReader};
+pub use indexes::{read_columns_indexes, ColumnIndex};
 pub use row_group::*;
-pub(crate) use schema::is_type_nullable;
 pub use schema::{infer_schema, FileMetaData};
 
-use std::{
-    io::{Read, Seek},
-    sync::Arc,
-};
-
-use crate::{array::Array, error::Result};
-
 /// Trait describing a [`FallibleStreamingIterator`] of [`DataPage`]
 pub trait DataPages:
     FallibleStreamingIterator<Item = DataPage, Error = ParquetError> + Send + Sync
diff --git a/src/io/parquet/read/row_group.rs b/src/io/parquet/read/row_group.rs
index f83a65eadbe..b53e60f271f 100644
--- a/src/io/parquet/read/row_group.rs
+++ b/src/io/parquet/read/row_group.rs
@@ -9,7 +9,7 @@ use futures::{
 };
 use parquet2::{
     metadata::ColumnChunkMetaData,
-    read::{BasicDecompressor, PageIterator},
+    read::{BasicDecompressor, PageReader},
 };
 
 use crate::{
@@ -95,7 +95,7 @@ pub(super) fn get_field_columns<'a>(
 ) -> Vec<&'a ColumnChunkMetaData> {
     columns
         .iter()
-        .filter(|x| x.descriptor().path_in_schema()[0] == field_name)
+        .filter(|x| x.descriptor().path_in_schema[0] == field_name)
         .collect()
 }
 
@@ -181,17 +181,15 @@ pub fn to_deserializer<'a>(
     let (columns, types): (Vec<_>, Vec<_>) = columns
         .into_iter()
         .map(|(column_meta, chunk)| {
-            let pages = PageIterator::new(
+            let pages = PageReader::new(
                 std::io::Cursor::new(chunk),
-                column_meta.num_values(),
-                column_meta.compression(),
-                column_meta.descriptor().clone(),
+                column_meta,
                 Arc::new(|_, _| true),
                 vec![],
             );
             (
                 BasicDecompressor::new(pages, vec![]),
-                column_meta.descriptor().type_(),
+                &column_meta.descriptor().descriptor.primitive_type,
             )
         })
         .unzip();
diff --git a/src/io/parquet/read/schema/convert.rs b/src/io/parquet/read/schema/convert.rs
index ae2d66a1b9b..9e32ee30572 100644
--- a/src/io/parquet/read/schema/convert.rs
+++ b/src/io/parquet/read/schema/convert.rs
@@ -1,8 +1,8 @@
 //! This module has a single entry point, [`parquet_to_arrow_schema`].
 use parquet2::schema::{
     types::{
-        BasicTypeInfo, GroupConvertedType, LogicalType, ParquetType, PhysicalType,
-        PrimitiveConvertedType, TimeUnit as ParquetTimeUnit, TimestampType,
+        FieldInfo, GroupConvertedType, GroupLogicalType, IntegerType, ParquetType, PhysicalType,
+        PrimitiveConvertedType, PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit,
     },
     Repetition,
 };
@@ -16,28 +16,27 @@ pub fn parquet_to_arrow_schema(fields: &[ParquetType]) -> Vec<Field> {
 }
 
 fn from_int32(
-    logical_type: &Option<LogicalType>,
-    converted_type: &Option<PrimitiveConvertedType>,
+    logical_type: Option<PrimitiveLogicalType>,
+    converted_type: Option<PrimitiveConvertedType>,
 ) -> DataType {
+    use PrimitiveLogicalType::*;
     match (logical_type, converted_type) {
         // handle logical types first
-        (Some(LogicalType::INTEGER(t)), _) => match (t.bit_width, t.is_signed) {
-            (8, true) => DataType::Int8,
-            (16, true) => DataType::Int16,
-            (32, true) => DataType::Int32,
-            (8, false) => DataType::UInt8,
-            (16, false) => DataType::UInt16,
-            (32, false) => DataType::UInt32,
+        (Some(Integer(t)), _) => match t {
+            IntegerType::Int8 => DataType::Int8,
+            IntegerType::Int16 => DataType::Int16,
+            IntegerType::Int32 => DataType::Int32,
+            IntegerType::UInt8 => DataType::UInt8,
+            IntegerType::UInt16 => DataType::UInt16,
+            IntegerType::UInt32 => DataType::UInt32,
             // The above are the only possible annotations for parquet's int32. Anything else
             // is a deviation to the parquet specification and we ignore
             _ => DataType::Int32,
         },
-        (Some(LogicalType::DECIMAL(t)), _) => {
-            DataType::Decimal(t.precision as usize, t.scale as usize)
-        }
-        (Some(LogicalType::DATE(_)), _) => DataType::Date32,
-        (Some(LogicalType::TIME(t)), _) => match t.unit {
-            ParquetTimeUnit::MILLIS(_) => DataType::Time32(TimeUnit::Millisecond),
+        (Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
+        (Some(Date), _) => DataType::Date32,
+        (Some(Time { unit, .. }), _) => match unit {
+            ParquetTimeUnit::Milliseconds => DataType::Time32(TimeUnit::Millisecond),
             // MILLIS is the only possible annotation for parquet's int32. Anything else
             // is a deviation to the parquet specification and we ignore
             _ => DataType::Int32,
@@ -52,30 +51,32 @@ fn from_int32(
         (_, Some(PrimitiveConvertedType::Date)) => DataType::Date32,
         (_, Some(PrimitiveConvertedType::TimeMillis)) => DataType::Time32(TimeUnit::Millisecond),
         (_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
-            DataType::Decimal(*precision as usize, *scale as usize)
+            DataType::Decimal(precision, scale)
         }
         (_, _) => DataType::Int32,
     }
 }
 
 fn from_int64(
-    logical_type: &Option<LogicalType>,
-    converted_type: &Option<PrimitiveConvertedType>,
+    logical_type: Option<PrimitiveLogicalType>,
+    converted_type: Option<PrimitiveConvertedType>,
 ) -> DataType {
+    use PrimitiveLogicalType::*;
     match (logical_type, converted_type) {
         // handle logical types first
-        (Some(LogicalType::INTEGER(t)), _) if t.bit_width == 64 => match t.is_signed {
-            true => DataType::Int64,
-            false => DataType::UInt64,
+        (Some(Integer(integer)), _) => match integer {
+            IntegerType::UInt64 => DataType::UInt64,
+            IntegerType::Int64 => DataType::Int64,
+            _ => DataType::Int64,
         },
         (
-            Some(LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c,
+            Some(Timestamp {
+                is_adjusted_to_utc,
                 unit,
-            })),
+            }),
             _,
         ) => {
-            let timezone = if *is_adjusted_to_u_t_c {
+            let timezone = if is_adjusted_to_utc {
                 // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
                 // A TIMESTAMP with isAdjustedToUTC=true is defined as [...] elapsed since the Unix epoch
                 Some("+00:00".to_string())
@@ -93,21 +94,23 @@ fn from_int64(
             };
 
             match unit {
-                ParquetTimeUnit::MILLIS(_) => DataType::Timestamp(TimeUnit::Millisecond, timezone),
-                ParquetTimeUnit::MICROS(_) => DataType::Timestamp(TimeUnit::Microsecond, timezone),
-                ParquetTimeUnit::NANOS(_) => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
+                ParquetTimeUnit::Milliseconds => {
+                    DataType::Timestamp(TimeUnit::Millisecond, timezone)
+                }
+                ParquetTimeUnit::Microseconds => {
+                    DataType::Timestamp(TimeUnit::Microsecond, timezone)
+                }
+                ParquetTimeUnit::Nanoseconds => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
             }
         }
-        (Some(LogicalType::TIME(t)), _) => match t.unit {
-            ParquetTimeUnit::MICROS(_) => DataType::Time64(TimeUnit::Microsecond),
-            ParquetTimeUnit::NANOS(_) => DataType::Time64(TimeUnit::Nanosecond),
+        (Some(Time { unit, .. }), _) => match unit {
+            ParquetTimeUnit::Microseconds => DataType::Time64(TimeUnit::Microsecond),
+            ParquetTimeUnit::Nanoseconds => DataType::Time64(TimeUnit::Nanosecond),
             // MILLIS is only possible for int32. Appearing in int64 is a deviation
             // to parquet's spec, which we ignore
             _ => DataType::Int64,
         },
-        (Some(LogicalType::DECIMAL(t)), _) => {
-            DataType::Decimal(t.precision as usize, t.scale as usize)
-        }
+        (Some(Decimal(precision, scale)), _) => DataType::Decimal(precision, scale),
         // handle converted types:
         (_, Some(PrimitiveConvertedType::TimeMicros)) => DataType::Time64(TimeUnit::Microsecond),
         (_, Some(PrimitiveConvertedType::TimestampMillis)) => {
@@ -119,7 +122,7 @@ fn from_int64(
         (_, Some(PrimitiveConvertedType::Int64)) => DataType::Int64,
         (_, Some(PrimitiveConvertedType::Uint64)) => DataType::UInt64,
         (_, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
-            DataType::Decimal(*precision as usize, *scale as usize)
+            DataType::Decimal(precision, scale)
         }
 
         (_, _) => DataType::Int64,
@@ -127,14 +130,14 @@ fn from_int64(
 }
 
 fn from_byte_array(
-    logical_type: &Option<LogicalType>,
+    logical_type: &Option<PrimitiveLogicalType>,
     converted_type: &Option<PrimitiveConvertedType>,
 ) -> DataType {
     match (logical_type, converted_type) {
-        (Some(LogicalType::STRING(_)), _) => DataType::Utf8,
-        (Some(LogicalType::JSON(_)), _) => DataType::Binary,
-        (Some(LogicalType::BSON(_)), _) => DataType::Binary,
-        (Some(LogicalType::ENUM(_)), _) => DataType::Binary,
+        (Some(PrimitiveLogicalType::String), _) => DataType::Utf8,
+        (Some(PrimitiveLogicalType::Json), _) => DataType::Binary,
+        (Some(PrimitiveLogicalType::Bson), _) => DataType::Binary,
+        (Some(PrimitiveLogicalType::Enum), _) => DataType::Binary,
         (_, Some(PrimitiveConvertedType::Json)) => DataType::Binary,
         (_, Some(PrimitiveConvertedType::Bson)) => DataType::Binary,
         (_, Some(PrimitiveConvertedType::Enum)) => DataType::Binary,
@@ -144,16 +147,16 @@ fn from_byte_array(
 }
 
 fn from_fixed_len_byte_array(
-    length: &i32,
-    logical_type: &Option<LogicalType>,
-    converted_type: &Option<PrimitiveConvertedType>,
+    length: usize,
+    logical_type: Option<PrimitiveLogicalType>,
+    converted_type: Option<PrimitiveConvertedType>,
 ) -> DataType {
     match (logical_type, converted_type) {
-        (Some(LogicalType::DECIMAL(t)), _) => {
-            DataType::Decimal(t.precision as usize, t.scale as usize)
+        (Some(PrimitiveLogicalType::Decimal(precision, scale)), _) => {
+            DataType::Decimal(precision, scale)
         }
         (None, Some(PrimitiveConvertedType::Decimal(precision, scale))) => {
-            DataType::Decimal(*precision as usize, *scale as usize)
+            DataType::Decimal(precision, scale)
         }
         (None, Some(PrimitiveConvertedType::Interval)) => {
             // There is currently no reliable way of determining which IntervalUnit
@@ -161,46 +164,45 @@ fn from_fixed_len_byte_array(
             // would be incorrect if all 12 bytes of the interval are populated
             DataType::Interval(IntervalUnit::DayTime)
         }
-        _ => DataType::FixedSizeBinary(*length as usize),
+        _ => DataType::FixedSizeBinary(length),
     }
 }
 
 /// Maps a [`PhysicalType`] with optional metadata to a [`DataType`]
-fn to_primitive_type_inner(
-    physical_type: &PhysicalType,
-    logical_type: &Option<LogicalType>,
-    converted_type: &Option<PrimitiveConvertedType>,
-) -> DataType {
-    match physical_type {
+fn to_primitive_type_inner(primitive_type: &PrimitiveType) -> DataType {
+    match primitive_type.physical_type {
         PhysicalType::Boolean => DataType::Boolean,
-        PhysicalType::Int32 => from_int32(logical_type, converted_type),
-        PhysicalType::Int64 => from_int64(logical_type, converted_type),
+        PhysicalType::Int32 => {
+            from_int32(primitive_type.logical_type, primitive_type.converted_type)
+        }
+        PhysicalType::Int64 => {
+            from_int64(primitive_type.logical_type, primitive_type.converted_type)
+        }
         PhysicalType::Int96 => DataType::Timestamp(TimeUnit::Nanosecond, None),
         PhysicalType::Float => DataType::Float32,
         PhysicalType::Double => DataType::Float64,
-        PhysicalType::ByteArray => from_byte_array(logical_type, converted_type),
-        PhysicalType::FixedLenByteArray(length) => {
-            from_fixed_len_byte_array(length, logical_type, converted_type)
+        PhysicalType::ByteArray => {
+            from_byte_array(&primitive_type.logical_type, &primitive_type.converted_type)
         }
+        PhysicalType::FixedLenByteArray(length) => from_fixed_len_byte_array(
+            length,
+            primitive_type.logical_type,
+            primitive_type.converted_type,
+        ),
     }
 }
 
 /// Entry point for converting parquet primitive type to arrow type.
 ///
 /// This function takes care of repetition.
-fn to_primitive_type(
-    basic_info: &BasicTypeInfo,
-    physical_type: &PhysicalType,
-    logical_type: &Option<LogicalType>,
-    converted_type: &Option<PrimitiveConvertedType>,
-) -> DataType {
-    let base_type = to_primitive_type_inner(physical_type, logical_type, converted_type);
+fn to_primitive_type(primitive_type: &PrimitiveType) -> DataType {
+    let base_type = to_primitive_type_inner(primitive_type);
 
-    if basic_info.repetition() == &Repetition::Repeated {
+    if primitive_type.field_info.repetition == Repetition::Repeated {
         DataType::List(Box::new(Field::new(
-            basic_info.name(),
+            &primitive_type.field_info.name,
             base_type,
-            is_nullable(basic_info),
+            is_nullable(&primitive_type.field_info),
         )))
     } else {
         base_type
@@ -208,14 +210,14 @@ fn to_primitive_type(
 }
 
 fn non_repeated_group(
-    logical_type: &Option<LogicalType>,
+    logical_type: &Option<GroupLogicalType>,
     converted_type: &Option<GroupConvertedType>,
     fields: &[ParquetType],
     parent_name: &str,
 ) -> Option<DataType> {
     debug_assert!(!fields.is_empty());
     match (logical_type, converted_type) {
-        (Some(LogicalType::LIST(_)), _) => to_list(fields, parent_name),
+        (Some(GroupLogicalType::List), _) => to_list(fields, parent_name),
         (None, Some(GroupConvertedType::List)) => to_list(fields, parent_name),
         _ => to_struct(fields),
     }
@@ -236,18 +238,18 @@ fn to_struct(fields: &[ParquetType]) -> Option<DataType> {
 ///
 /// This function takes care of logical type and repetition.
 fn to_group_type(
-    basic_info: &BasicTypeInfo,
-    logical_type: &Option<LogicalType>,
+    field_info: &FieldInfo,
+    logical_type: &Option<GroupLogicalType>,
     converted_type: &Option<GroupConvertedType>,
     fields: &[ParquetType],
     parent_name: &str,
 ) -> Option<DataType> {
     debug_assert!(!fields.is_empty());
-    if basic_info.repetition() == &Repetition::Repeated {
+    if field_info.repetition == Repetition::Repeated {
         Some(DataType::List(Box::new(Field::new(
-            basic_info.name(),
+            &field_info.name,
             to_struct(fields)?,
-            is_nullable(basic_info),
+            is_nullable(field_info),
         ))))
     } else {
         non_repeated_group(logical_type, converted_type, fields, parent_name)
@@ -255,8 +257,8 @@ fn to_group_type(
 }
 
 /// Checks whether this schema is nullable.
-pub(crate) fn is_nullable(basic_info: &BasicTypeInfo) -> bool {
-    match basic_info.repetition() {
+pub(crate) fn is_nullable(field_info: &FieldInfo) -> bool {
+    match field_info.repetition {
         Repetition::Optional => true,
         Repetition::Repeated => true,
         Repetition::Required => false,
@@ -268,9 +270,9 @@ pub(crate) fn is_nullable(basic_info: &BasicTypeInfo) -> bool {
 /// i.e. if it is a column-less group type.
 fn to_field(type_: &ParquetType) -> Option<Field> {
     Some(Field::new(
-        type_.get_basic_info().name(),
+        &type_.get_field_info().name,
         to_data_type(type_)?,
-        is_nullable(type_.get_basic_info()),
+        is_nullable(type_.get_field_info()),
     ))
 }
 
@@ -282,16 +284,7 @@ fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
     let item = fields.first().unwrap();
 
     let item_type = match item {
-        ParquetType::PrimitiveType {
-            physical_type,
-            logical_type,
-            converted_type,
-            ..
-        } => Some(to_primitive_type_inner(
-            physical_type,
-            logical_type,
-            converted_type,
-        )),
+        ParquetType::PrimitiveType(primitive) => Some(to_primitive_type_inner(primitive)),
         ParquetType::GroupType { fields, .. } => {
             if fields.len() == 1
                 && item.name() != "array"
@@ -312,17 +305,17 @@ fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
     // Without this step, the child incorrectly inherits the parent's optionality
     let (list_item_name, item_is_optional) = match item {
         ParquetType::GroupType {
-            basic_info, fields, ..
-        } if basic_info.name() == "list" && fields.len() == 1 => {
+            field_info, fields, ..
+        } if field_info.name == "list" && fields.len() == 1 => {
             let field = fields.first().unwrap();
             (
-                field.name(),
-                field.get_basic_info().repetition() != &Repetition::Required,
+                &field.get_field_info().name,
+                field.get_field_info().repetition != Repetition::Required,
             )
         }
         _ => (
-            item.name(),
-            item.get_basic_info().repetition() != &Repetition::Required,
+            &item.get_field_info().name,
+            item.get_field_info().repetition != Repetition::Required,
         ),
     };
 
@@ -344,19 +337,9 @@ fn to_list(fields: &[ParquetType], parent_name: &str) -> Option<DataType> {
 /// conversion, the result is Ok(None).
 pub(crate) fn to_data_type(type_: &ParquetType) -> Option<DataType> {
     match type_ {
-        ParquetType::PrimitiveType {
-            basic_info,
-            physical_type,
-            logical_type,
-            converted_type,
-        } => Some(to_primitive_type(
-            basic_info,
-            physical_type,
-            logical_type,
-            converted_type,
-        )),
+        ParquetType::PrimitiveType(primitive) => Some(to_primitive_type(primitive)),
         ParquetType::GroupType {
-            basic_info,
+            field_info,
             logical_type,
             converted_type,
             fields,
@@ -365,11 +348,11 @@ pub(crate) fn to_data_type(type_: &ParquetType) -> Option<DataType> {
                 None
             } else {
                 to_group_type(
-                    basic_info,
+                    field_info,
                     logical_type,
                     converted_type,
                     fields,
-                    basic_info.name(),
+                    &field_info.name,
                 )
             }
         }
diff --git a/src/io/parquet/read/schema/mod.rs b/src/io/parquet/read/schema/mod.rs
index 0c7e7d4d665..17147fb03b5 100644
--- a/src/io/parquet/read/schema/mod.rs
+++ b/src/io/parquet/read/schema/mod.rs
@@ -28,7 +28,3 @@ pub fn infer_schema(file_metadata: &FileMetaData) -> Result<Schema> {
         Schema { fields, metadata }
     }))
 }
-
-pub(crate) fn is_type_nullable(type_: &ParquetType) -> bool {
-    is_nullable(type_.get_basic_info())
-}
diff --git a/src/io/parquet/read/statistics/primitive.rs b/src/io/parquet/read/statistics/primitive.rs
index 91a630692df..9ef9ec7d44e 100644
--- a/src/io/parquet/read/statistics/primitive.rs
+++ b/src/io/parquet/read/statistics/primitive.rs
@@ -1,14 +1,14 @@
-use crate::datatypes::TimeUnit;
-use crate::{datatypes::DataType, types::NativeType};
-use parquet2::schema::types::{
-    LogicalType, ParquetType, TimeUnit as ParquetTimeUnit, TimestampType,
-};
+use std::any::Any;
+
+use parquet2::schema::types::{PrimitiveLogicalType, PrimitiveType, TimeUnit as ParquetTimeUnit};
 use parquet2::statistics::PrimitiveStatistics as ParquetPrimitiveStatistics;
 use parquet2::types::NativeType as ParquetNativeType;
-use std::any::Any;
 
-use super::Statistics;
+use crate::datatypes::TimeUnit;
 use crate::error::Result;
+use crate::{datatypes::DataType, types::NativeType};
+
+use super::Statistics;
 
 /// Arrow-deserialized parquet Statistics of a primitive type
 #[derive(Debug, Clone, PartialEq)]
@@ -74,35 +74,29 @@ pub(super) fn statistics_from_i32(
     })
 }
 
-fn timestamp(type_: &ParquetType, time_unit: TimeUnit, x: i64) -> i64 {
-    let logical_type = if let ParquetType::PrimitiveType { logical_type, .. } = type_ {
-        logical_type
-    } else {
-        unreachable!()
-    };
-
-    let unit = if let Some(LogicalType::TIMESTAMP(TimestampType { unit, .. })) = logical_type {
+fn timestamp(type_: &PrimitiveType, time_unit: TimeUnit, x: i64) -> i64 {
+    let unit = if let Some(PrimitiveLogicalType::Timestamp { unit, .. }) = &type_.logical_type {
         unit
     } else {
         return x;
     };
 
     match (unit, time_unit) {
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Second) => x / 1_000,
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Second) => x / 1_000_000,
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Second) => x * 1_000_000_000,
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Second) => x / 1_000,
+        (ParquetTimeUnit::Microseconds, TimeUnit::Second) => x / 1_000_000,
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Second) => x * 1_000_000_000,
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Millisecond) => x,
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Millisecond) => x / 1_000,
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Millisecond) => x / 1_000_000,
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Millisecond) => x,
+        (ParquetTimeUnit::Microseconds, TimeUnit::Millisecond) => x / 1_000,
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Millisecond) => x / 1_000_000,
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Microsecond) => x * 1_000,
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Microsecond) => x,
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Microsecond) => x / 1_000,
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Microsecond) => x * 1_000,
+        (ParquetTimeUnit::Microseconds, TimeUnit::Microsecond) => x,
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Microsecond) => x / 1_000,
 
-        (ParquetTimeUnit::MILLIS(_), TimeUnit::Nanosecond) => x * 1_000_000,
-        (ParquetTimeUnit::MICROS(_), TimeUnit::Nanosecond) => x * 1_000,
-        (ParquetTimeUnit::NANOS(_), TimeUnit::Nanosecond) => x,
+        (ParquetTimeUnit::Milliseconds, TimeUnit::Nanosecond) => x * 1_000_000,
+        (ParquetTimeUnit::Microseconds, TimeUnit::Nanosecond) => x * 1_000,
+        (ParquetTimeUnit::Nanoseconds, TimeUnit::Nanosecond) => x,
     }
 }
 
@@ -121,10 +115,10 @@ pub(super) fn statistics_from_i64(
             distinct_count: stats.distinct_count,
             min_value: stats
                 .min_value
-                .map(|x| timestamp(stats.descriptor.type_(), time_unit, x)),
+                .map(|x| timestamp(&stats.primitive_type, time_unit, x)),
             max_value: stats
                 .max_value
-                .map(|x| timestamp(stats.descriptor.type_(), time_unit, x)),
+                .map(|x| timestamp(&stats.primitive_type, time_unit, x)),
         }),
         Decimal(_, _) => Box::new(PrimitiveStatistics::<i128>::from((stats, data_type))),
         _ => Box::new(PrimitiveStatistics::<i64>::from((stats, data_type))),
diff --git a/src/io/parquet/write/binary/basic.rs b/src/io/parquet/write/binary/basic.rs
index 7a7c4cd805e..277a5c192a4 100644
--- a/src/io/parquet/write/binary/basic.rs
+++ b/src/io/parquet/write/binary/basic.rs
@@ -1,17 +1,18 @@
 use parquet2::{
     encoding::{delta_bitpacked, Encoding},
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::DataPage,
+    schema::types::PrimitiveType,
     statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics},
-    write::WriteOptions,
 };
 
 use super::super::utils;
+use super::super::WriteOptions;
 use crate::{
     array::{Array, BinaryArray, Offset},
     bitmap::Bitmap,
     error::{ArrowError, Result},
-    io::parquet::read::is_type_nullable,
+    io::parquet::read::schema::is_nullable,
 };
 
 pub(crate) fn encode_plain<O: Offset>(
@@ -42,11 +43,11 @@ pub(crate) fn encode_plain<O: Offset>(
 pub fn array_to_page<O: Offset>(
     array: &BinaryArray<O>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     encoding: Encoding,
 ) -> Result<DataPage> {
     let validity = array.validity();
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let mut buffer = vec![];
     utils::write_def_levels(
@@ -78,7 +79,7 @@ pub fn array_to_page<O: Offset>(
     }
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -86,6 +87,7 @@ pub fn array_to_page<O: Offset>(
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
@@ -96,12 +98,12 @@ pub fn array_to_page<O: Offset>(
     )
 }
 
-pub(super) fn build_statistics<O: Offset>(
+pub(crate) fn build_statistics<O: Offset>(
     array: &BinaryArray<O>,
-    descriptor: ColumnDescriptor,
+    primitive_type: PrimitiveType,
 ) -> ParquetStatistics {
     let statistics = &BinaryStatistics {
-        descriptor,
+        primitive_type,
         null_count: Some(array.null_count() as i64),
         distinct_count: None,
         max_value: array
diff --git a/src/io/parquet/write/binary/mod.rs b/src/io/parquet/write/binary/mod.rs
index 8d9e94cd0fb..e229572b14a 100644
--- a/src/io/parquet/write/binary/mod.rs
+++ b/src/io/parquet/write/binary/mod.rs
@@ -2,6 +2,7 @@ mod basic;
 mod nested;
 
 pub use basic::array_to_page;
+pub(crate) use basic::build_statistics;
 pub(crate) use basic::encode_plain;
 pub(super) use basic::{encode_delta, ord_binary};
 pub use nested::array_to_page as nested_array_to_page;
diff --git a/src/io/parquet/write/binary/nested.rs b/src/io/parquet/write/binary/nested.rs
index 9161741c4cb..a6e65e2f7e4 100644
--- a/src/io/parquet/write/binary/nested.rs
+++ b/src/io/parquet/write/binary/nested.rs
@@ -1,26 +1,25 @@
-use parquet2::{
-    encoding::Encoding, metadata::ColumnDescriptor, page::DataPage, write::WriteOptions,
-};
+use parquet2::metadata::Descriptor;
+use parquet2::{encoding::Encoding, page::DataPage};
 
-use super::super::{levels, utils};
+use super::super::{levels, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
+use crate::io::parquet::read::schema::is_nullable;
 use crate::{
     array::{Array, BinaryArray, Offset},
     error::Result,
-    io::parquet::read::is_type_nullable,
 };
 
 pub fn array_to_page<O, OO>(
     array: &BinaryArray<O>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     nested: levels::NestedInfo<OO>,
 ) -> Result<DataPage>
 where
     OO: Offset,
     O: Offset,
 {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -34,7 +33,7 @@ where
     encode_plain(array, is_optional, &mut buffer);
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -42,6 +41,7 @@ where
     utils::build_plain_page(
         buffer,
         levels::num_values(nested.offsets()),
+        nested.offsets().len().saturating_sub(1),
         array.null_count(),
         repetition_levels_byte_length,
         definition_levels_byte_length,
diff --git a/src/io/parquet/write/boolean/basic.rs b/src/io/parquet/write/boolean/basic.rs
index f9046d6d585..643a25cd5b2 100644
--- a/src/io/parquet/write/boolean/basic.rs
+++ b/src/io/parquet/write/boolean/basic.rs
@@ -1,14 +1,14 @@
 use parquet2::{
     encoding::{hybrid_rle::bitpacked_encode, Encoding},
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::DataPage,
     statistics::{serialize_statistics, BooleanStatistics, ParquetStatistics, Statistics},
-    write::WriteOptions,
 };
 
 use super::super::utils;
-use crate::error::Result;
-use crate::{array::*, io::parquet::read::is_type_nullable};
+use super::super::WriteOptions;
+use crate::array::*;
+use crate::{error::Result, io::parquet::read::schema::is_nullable};
 
 fn encode(iterator: impl Iterator<Item = bool>, buffer: &mut Vec<u8>) -> Result<()> {
     // encode values using bitpacking
@@ -41,9 +41,9 @@ pub(super) fn encode_plain(
 pub fn array_to_page(
     array: &BooleanArray,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
 ) -> Result<DataPage> {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -69,6 +69,7 @@ pub fn array_to_page(
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
diff --git a/src/io/parquet/write/boolean/nested.rs b/src/io/parquet/write/boolean/nested.rs
index 427c7a05925..40645eea3d2 100644
--- a/src/io/parquet/write/boolean/nested.rs
+++ b/src/io/parquet/write/boolean/nested.rs
@@ -1,25 +1,23 @@
-use parquet2::{
-    encoding::Encoding, metadata::ColumnDescriptor, page::DataPage, write::WriteOptions,
-};
+use parquet2::{encoding::Encoding, metadata::Descriptor, page::DataPage};
 
-use super::super::{levels, utils};
+use super::super::{levels, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
+use crate::io::parquet::read::schema::is_nullable;
 use crate::{
     array::{Array, BooleanArray, Offset},
     error::Result,
-    io::parquet::read::is_type_nullable,
 };
 
 pub fn array_to_page<O>(
     array: &BooleanArray,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     nested: levels::NestedInfo<O>,
 ) -> Result<DataPage>
 where
     O: Offset,
 {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -41,6 +39,7 @@ where
     utils::build_plain_page(
         buffer,
         levels::num_values(nested.offsets()),
+        nested.offsets().len().saturating_sub(1),
         array.null_count(),
         repetition_levels_byte_length,
         definition_levels_byte_length,
diff --git a/src/io/parquet/write/dictionary.rs b/src/io/parquet/write/dictionary.rs
index 521c863aac7..7a36f2bbef1 100644
--- a/src/io/parquet/write/dictionary.rs
+++ b/src/io/parquet/write/dictionary.rs
@@ -1,29 +1,37 @@
 use parquet2::{
     encoding::{hybrid_rle::encode_u32, Encoding},
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::{EncodedDictPage, EncodedPage},
-    write::{DynIter, WriteOptions},
+    statistics::ParquetStatistics,
+    write::DynIter,
 };
 
+use super::binary::build_statistics as binary_build_statistics;
 use super::binary::encode_plain as binary_encode_plain;
+use super::fixed_len_bytes::build_statistics as fixed_binary_build_statistics;
 use super::fixed_len_bytes::encode_plain as fixed_binary_encode_plain;
+use super::primitive::build_statistics as primitive_build_statistics;
 use super::primitive::encode_plain as primitive_encode_plain;
+use super::utf8::build_statistics as utf8_build_statistics;
 use super::utf8::encode_plain as utf8_encode_plain;
-use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
+use super::WriteOptions;
 use crate::bitmap::Bitmap;
 use crate::datatypes::DataType;
 use crate::error::{ArrowError, Result};
-use crate::io::parquet::read::is_type_nullable;
 use crate::io::parquet::write::utils;
+use crate::{
+    array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray},
+    io::parquet::read::schema::is_nullable,
+};
 
 fn encode_keys<K: DictionaryKey>(
     array: &PrimitiveArray<K>,
-    // todo: merge this to not discard values' validity
     validity: Option<&Bitmap>,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
+    statistics: ParquetStatistics,
     options: WriteOptions,
 ) -> Result<EncodedPage> {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let mut buffer = vec![];
 
@@ -94,10 +102,11 @@ fn encode_keys<K: DictionaryKey>(
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
-        None,
+        Some(statistics),
         descriptor,
         options,
         Encoding::RleDictionary,
@@ -106,74 +115,83 @@ fn encode_keys<K: DictionaryKey>(
 }
 
 macro_rules! dyn_prim {
-    ($from:ty, $to:ty, $array:expr, $options:expr) => {{
+    ($from:ty, $to:ty, $array:expr, $options:expr, $descriptor:expr) => {{
         let values = $array.values().as_any().downcast_ref().unwrap();
 
         let mut buffer = vec![];
         primitive_encode_plain::<$from, $to>(values, false, &mut buffer);
-        EncodedDictPage::new(buffer, values.len())
+        (
+            EncodedDictPage::new(buffer, values.len()),
+            primitive_build_statistics::<$from, $to>(values, $descriptor.primitive_type.clone()),
+        )
     }};
 }
 
 pub fn array_to_pages<K: DictionaryKey>(
     array: &DictionaryArray<K>,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
     encoding: Encoding,
 ) -> Result<DynIter<'static, Result<EncodedPage>>> {
     match encoding {
         Encoding::PlainDictionary | Encoding::RleDictionary => {
             // write DictPage
-            let dict_page = match array.values().data_type().to_logical_type() {
-                DataType::Int8 => dyn_prim!(i8, i32, array, options),
-                DataType::Int16 => dyn_prim!(i16, i32, array, options),
+            let (dict_page, statistics) = match array.values().data_type().to_logical_type() {
+                DataType::Int8 => dyn_prim!(i8, i32, array, options, descriptor),
+                DataType::Int16 => dyn_prim!(i16, i32, array, options, descriptor),
                 DataType::Int32 | DataType::Date32 | DataType::Time32(_) => {
-                    dyn_prim!(i32, i32, array, options)
+                    dyn_prim!(i32, i32, array, options, descriptor)
                 }
                 DataType::Int64
                 | DataType::Date64
                 | DataType::Time64(_)
                 | DataType::Timestamp(_, _)
-                | DataType::Duration(_) => dyn_prim!(i64, i64, array, options),
-                DataType::UInt8 => dyn_prim!(u8, i32, array, options),
-                DataType::UInt16 => dyn_prim!(u16, i32, array, options),
-                DataType::UInt32 => dyn_prim!(u32, i32, array, options),
-                DataType::UInt64 => dyn_prim!(i64, i64, array, options),
-                DataType::Float32 => dyn_prim!(f32, f32, array, options),
-                DataType::Float64 => dyn_prim!(f64, f64, array, options),
+                | DataType::Duration(_) => dyn_prim!(i64, i64, array, options, descriptor),
+                DataType::UInt8 => dyn_prim!(u8, i32, array, options, descriptor),
+                DataType::UInt16 => dyn_prim!(u16, i32, array, options, descriptor),
+                DataType::UInt32 => dyn_prim!(u32, i32, array, options, descriptor),
+                DataType::UInt64 => dyn_prim!(i64, i64, array, options, descriptor),
+                DataType::Float32 => dyn_prim!(f32, f32, array, options, descriptor),
+                DataType::Float64 => dyn_prim!(f64, f64, array, options, descriptor),
                 DataType::Utf8 => {
-                    let values = array.values().as_any().downcast_ref().unwrap();
+                    let array = array.values().as_any().downcast_ref().unwrap();
 
                     let mut buffer = vec![];
-                    utf8_encode_plain::<i32>(values, false, &mut buffer);
-                    EncodedDictPage::new(buffer, values.len())
+                    utf8_encode_plain::<i32>(array, false, &mut buffer);
+                    let stats = utf8_build_statistics(array, descriptor.primitive_type.clone());
+                    (EncodedDictPage::new(buffer, array.len()), stats)
                 }
                 DataType::LargeUtf8 => {
-                    let values = array.values().as_any().downcast_ref().unwrap();
+                    let array = array.values().as_any().downcast_ref().unwrap();
 
                     let mut buffer = vec![];
-                    utf8_encode_plain::<i64>(values, false, &mut buffer);
-                    EncodedDictPage::new(buffer, values.len())
+                    utf8_encode_plain::<i64>(array, false, &mut buffer);
+                    let stats = utf8_build_statistics(array, descriptor.primitive_type.clone());
+                    (EncodedDictPage::new(buffer, array.len()), stats)
                 }
                 DataType::Binary => {
-                    let values = array.values().as_any().downcast_ref().unwrap();
+                    let array = array.values().as_any().downcast_ref().unwrap();
 
                     let mut buffer = vec![];
-                    binary_encode_plain::<i32>(values, false, &mut buffer);
-                    EncodedDictPage::new(buffer, values.len())
+                    binary_encode_plain::<i32>(array, false, &mut buffer);
+                    let stats = binary_build_statistics(array, descriptor.primitive_type.clone());
+                    (EncodedDictPage::new(buffer, array.len()), stats)
                 }
                 DataType::LargeBinary => {
-                    let values = array.values().as_any().downcast_ref().unwrap();
+                    let array = array.values().as_any().downcast_ref().unwrap();
 
                     let mut buffer = vec![];
-                    binary_encode_plain::<i64>(values, false, &mut buffer);
-                    EncodedDictPage::new(buffer, values.len())
+                    binary_encode_plain::<i64>(array, false, &mut buffer);
+                    let stats = binary_build_statistics(array, descriptor.primitive_type.clone());
+                    (EncodedDictPage::new(buffer, array.len()), stats)
                 }
                 DataType::FixedSizeBinary(_) => {
                     let mut buffer = vec![];
                     let array = array.values().as_any().downcast_ref().unwrap();
                     fixed_binary_encode_plain(array, false, &mut buffer);
-                    EncodedDictPage::new(buffer, array.len())
+                    let stats =
+                        fixed_binary_build_statistics(array, descriptor.primitive_type.clone());
+                    (EncodedDictPage::new(buffer, array.len()), stats)
                 }
                 other => {
                     return Err(ArrowError::NotYetImplemented(format!(
@@ -185,8 +203,13 @@ pub fn array_to_pages<K: DictionaryKey>(
             let dict_page = EncodedPage::Dict(dict_page);
 
             // write DataPage pointing to DictPage
-            let data_page =
-                encode_keys(array.keys(), array.values().validity(), descriptor, options)?;
+            let data_page = encode_keys(
+                array.keys(),
+                array.values().validity(),
+                descriptor,
+                statistics,
+                options,
+            )?;
 
             let iter = std::iter::once(Ok(dict_page)).chain(std::iter::once(Ok(data_page)));
             Ok(DynIter::new(Box::new(iter)))
diff --git a/src/io/parquet/write/file.rs b/src/io/parquet/write/file.rs
index 47f595a1717..27c62f6edf5 100644
--- a/src/io/parquet/write/file.rs
+++ b/src/io/parquet/write/file.rs
@@ -1,13 +1,14 @@
 use std::io::Write;
 
+use parquet2::metadata::KeyValue;
 use parquet2::metadata::SchemaDescriptor;
 use parquet2::write::RowGroupIter;
-use parquet2::{metadata::KeyValue, write::WriteOptions};
+use parquet2::write::WriteOptions as FileWriteOptions;
 
 use crate::datatypes::Schema;
 use crate::error::{ArrowError, Result};
 
-use super::{schema::schema_to_metadata_key, to_parquet_schema};
+use super::{schema::schema_to_metadata_key, to_parquet_schema, WriteOptions};
 
 /// Attaches [`Schema`] to `key_value_metadata`
 pub fn add_arrow_schema(
@@ -26,13 +27,14 @@ pub fn add_arrow_schema(
 pub struct FileWriter<W: Write> {
     writer: parquet2::write::FileWriter<W>,
     schema: Schema,
+    options: WriteOptions,
 }
 
 // Accessors
 impl<W: Write> FileWriter<W> {
     /// The options assigned to the file
-    pub fn options(&self) -> &WriteOptions {
-        self.writer.options()
+    pub fn options(&self) -> WriteOptions {
+        self.options
     }
 
     /// The [`SchemaDescriptor`] assigned to this file
@@ -56,8 +58,17 @@ impl<W: Write> FileWriter<W> {
         let created_by = Some("Arrow2 - Native Rust implementation of Arrow".to_string());
 
         Ok(Self {
-            writer: parquet2::write::FileWriter::new(writer, parquet_schema, options, created_by),
+            writer: parquet2::write::FileWriter::new(
+                writer,
+                parquet_schema,
+                FileWriteOptions {
+                    version: options.version,
+                    write_statistics: options.write_statistics,
+                },
+                created_by,
+            ),
             schema,
+            options,
         })
     }
 
@@ -67,17 +78,18 @@ impl<W: Write> FileWriter<W> {
     }
 
     /// Writes a row group to the file.
-    pub fn write(
-        &mut self,
-        row_group: RowGroupIter<'_, ArrowError>,
-        num_rows: usize,
-    ) -> Result<()> {
-        Ok(self.writer.write(row_group, num_rows)?)
+    pub fn write(&mut self, row_group: RowGroupIter<'_, ArrowError>) -> Result<()> {
+        Ok(self.writer.write(row_group)?)
     }
 
     /// Writes the footer of the parquet file. Returns the total size of the file.
-    pub fn end(self, key_value_metadata: Option<Vec<KeyValue>>) -> Result<(u64, W)> {
+    pub fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> Result<u64> {
         let key_value_metadata = add_arrow_schema(&self.schema, key_value_metadata);
         Ok(self.writer.end(key_value_metadata)?)
     }
+
+    /// Consumes this writer and returns the inner writer
+    pub fn into_inner(self) -> W {
+        self.writer.into_inner()
+    }
 }
diff --git a/src/io/parquet/write/fixed_len_bytes.rs b/src/io/parquet/write/fixed_len_bytes.rs
index e129ab66c46..59ae75134e8 100644
--- a/src/io/parquet/write/fixed_len_bytes.rs
+++ b/src/io/parquet/write/fixed_len_bytes.rs
@@ -1,16 +1,16 @@
 use parquet2::{
     encoding::Encoding,
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::DataPage,
-    statistics::{deserialize_statistics, serialize_statistics, ParquetStatistics},
-    write::WriteOptions,
+    schema::types::PrimitiveType,
+    statistics::{serialize_statistics, FixedLenStatistics, ParquetStatistics, Statistics},
 };
 
-use super::{binary::ord_binary, utils};
+use super::{binary::ord_binary, utils, WriteOptions};
 use crate::{
     array::{Array, FixedSizeBinaryArray},
     error::Result,
-    io::parquet::read::is_type_nullable,
+    io::parquet::read::schema::is_nullable,
 };
 
 pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buffer: &mut Vec<u8>) {
@@ -29,9 +29,9 @@ pub(crate) fn encode_plain(array: &FixedSizeBinaryArray, is_optional: bool, buff
 pub fn array_to_page(
     array: &FixedSizeBinaryArray,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
 ) -> Result<DataPage> {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
     let validity = array.validity();
 
     let mut buffer = vec![];
@@ -48,7 +48,7 @@ pub fn array_to_page(
     encode_plain(array, is_optional, &mut buffer);
 
     let statistics = if options.write_statistics {
-        build_statistics(array, descriptor.clone())
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -56,6 +56,7 @@ pub fn array_to_page(
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
@@ -68,11 +69,10 @@ pub fn array_to_page(
 
 pub(super) fn build_statistics(
     array: &FixedSizeBinaryArray,
-    descriptor: ColumnDescriptor,
-) -> Option<ParquetStatistics> {
-    let pq_statistics = &ParquetStatistics {
-        max: None,
-        min: None,
+    primitive_type: PrimitiveType,
+) -> ParquetStatistics {
+    let statistics = &FixedLenStatistics {
+        primitive_type,
         null_count: Some(array.null_count() as i64),
         distinct_count: None,
         max_value: array
@@ -85,8 +85,6 @@ pub(super) fn build_statistics(
             .flatten()
             .min_by(|x, y| ord_binary(x, y))
             .map(|x| x.to_vec()),
-    };
-    deserialize_statistics(pq_statistics, descriptor)
-        .map(|e| serialize_statistics(&*e))
-        .ok()
+    } as &dyn Statistics;
+    serialize_statistics(statistics)
 }
diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs
index 7c31f27fc52..07bf6211edd 100644
--- a/src/io/parquet/write/mod.rs
+++ b/src/io/parquet/write/mod.rs
@@ -16,7 +16,7 @@ use crate::array::*;
 use crate::bitmap::Bitmap;
 use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
-use crate::io::parquet::read::is_type_nullable;
+use crate::io::parquet::read::schema::is_nullable;
 use crate::io::parquet::write::levels::NestedInfo;
 use crate::types::days_ms;
 use crate::types::NativeType;
@@ -26,15 +26,24 @@ pub use parquet2::{
     compression::Compression,
     encoding::Encoding,
     fallible_streaming_iterator,
-    metadata::{ColumnDescriptor, KeyValue, SchemaDescriptor},
+    metadata::{Descriptor, KeyValue, SchemaDescriptor},
     page::{CompressedDataPage, CompressedPage, EncodedPage},
     schema::types::ParquetType,
-    write::{
-        compress, Compressor, DynIter, DynStreamingIterator, RowGroupIter, Version, WriteOptions,
-    },
+    write::{compress, Compressor, DynIter, DynStreamingIterator, RowGroupIter, Version},
     FallibleStreamingIterator,
 };
 
+/// Currently supported options to write to parquet
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct WriteOptions {
+    /// Whether to write statistics
+    pub write_statistics: bool,
+    /// The page and file version to use
+    pub version: Version,
+    /// The compression to apply to every page
+    pub compression: Compression,
+}
+
 pub use file::FileWriter;
 pub use row_group::{row_group_iter, RowGroupIterator};
 pub use schema::to_parquet_type;
@@ -80,7 +89,7 @@ pub fn can_encode(data_type: &DataType, encoding: Encoding) -> bool {
 /// Returns an iterator of [`EncodedPage`].
 pub fn array_to_pages(
     array: &dyn Array,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
     encoding: Encoding,
 ) -> Result<DynIter<'static, Result<EncodedPage>>> {
@@ -103,7 +112,7 @@ pub fn array_to_pages(
 /// Converts an [`Array`] to a [`CompressedPage`] based on options, descriptor and `encoding`.
 pub fn array_to_page(
     array: &dyn Array,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
     encoding: Encoding,
 ) -> Result<EncodedPage> {
@@ -316,11 +325,11 @@ fn list_array_to_page<O: Offset>(
     offsets: &[O],
     validity: Option<&Bitmap>,
     values: &dyn Array,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
 ) -> Result<DataPage> {
     use DataType::*;
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
     let nested = NestedInfo::new(offsets, validity, is_optional);
 
     match values.data_type() {
@@ -347,47 +356,19 @@ fn list_array_to_page<O: Offset>(
 
         Utf8 => {
             let values = values.as_any().downcast_ref().unwrap();
-            let is_optional = is_type_nullable(descriptor.type_());
-
-            utf8::nested_array_to_page::<i32, O>(
-                values,
-                options,
-                descriptor,
-                NestedInfo::new(offsets, validity, is_optional),
-            )
+            utf8::nested_array_to_page::<i32, O>(values, options, descriptor, nested)
         }
         LargeUtf8 => {
             let values = values.as_any().downcast_ref().unwrap();
-            let is_optional = is_type_nullable(descriptor.type_());
-
-            utf8::nested_array_to_page::<i64, O>(
-                values,
-                options,
-                descriptor,
-                NestedInfo::new(offsets, validity, is_optional),
-            )
+            utf8::nested_array_to_page::<i64, O>(values, options, descriptor, nested)
         }
         Binary => {
             let values = values.as_any().downcast_ref().unwrap();
-            let is_optional = is_type_nullable(descriptor.type_());
-
-            binary::nested_array_to_page::<i32, O>(
-                values,
-                options,
-                descriptor,
-                NestedInfo::new(offsets, validity, is_optional),
-            )
+            binary::nested_array_to_page::<i32, O>(values, options, descriptor, nested)
         }
         LargeBinary => {
             let values = values.as_any().downcast_ref().unwrap();
-            let is_optional = is_type_nullable(descriptor.type_());
-
-            binary::nested_array_to_page::<i64, O>(
-                values,
-                options,
-                descriptor,
-                NestedInfo::new(offsets, validity, is_optional),
-            )
+            binary::nested_array_to_page::<i64, O>(values, options, descriptor, nested)
         }
         _ => todo!(),
     }
@@ -395,7 +376,7 @@ fn list_array_to_page<O: Offset>(
 
 fn nested_array_to_page(
     array: &dyn Array,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
 ) -> Result<DataPage> {
     match array.data_type() {
diff --git a/src/io/parquet/write/primitive/basic.rs b/src/io/parquet/write/primitive/basic.rs
index 9b9deb16d0b..1c58804fa1a 100644
--- a/src/io/parquet/write/primitive/basic.rs
+++ b/src/io/parquet/write/primitive/basic.rs
@@ -1,17 +1,18 @@
 use parquet2::{
     encoding::Encoding,
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::DataPage,
+    schema::types::PrimitiveType,
     statistics::{serialize_statistics, ParquetStatistics, PrimitiveStatistics, Statistics},
     types::NativeType,
-    write::WriteOptions,
 };
 
 use super::super::utils;
+use super::super::WriteOptions;
 use crate::{
     array::{Array, PrimitiveArray},
     error::Result,
-    io::parquet::read::is_type_nullable,
+    io::parquet::read::schema::is_nullable,
     types::NativeType as ArrowNativeType,
 };
 
@@ -41,14 +42,14 @@ where
 pub fn array_to_page<T, R>(
     array: &PrimitiveArray<T>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
 ) -> Result<DataPage>
 where
     T: ArrowNativeType,
     R: NativeType,
     T: num_traits::AsPrimitive<R>,
 {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -66,7 +67,7 @@ where
     encode_plain(array, is_optional, &mut buffer);
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -74,6 +75,7 @@ where
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
@@ -86,7 +88,7 @@ where
 
 pub fn build_statistics<T, R>(
     array: &PrimitiveArray<T>,
-    descriptor: ColumnDescriptor,
+    primitive_type: PrimitiveType,
 ) -> ParquetStatistics
 where
     T: ArrowNativeType,
@@ -94,7 +96,7 @@ where
     T: num_traits::AsPrimitive<R>,
 {
     let statistics = &PrimitiveStatistics::<R> {
-        descriptor,
+        primitive_type,
         null_count: Some(array.null_count() as i64),
         distinct_count: None,
         max_value: array
diff --git a/src/io/parquet/write/primitive/mod.rs b/src/io/parquet/write/primitive/mod.rs
index ddeb6541605..eec1d695d1d 100644
--- a/src/io/parquet/write/primitive/mod.rs
+++ b/src/io/parquet/write/primitive/mod.rs
@@ -2,5 +2,6 @@ mod basic;
 mod nested;
 
 pub use basic::array_to_page;
+pub(crate) use basic::build_statistics;
 pub(crate) use basic::encode_plain;
 pub use nested::array_to_page as nested_array_to_page;
diff --git a/src/io/parquet/write/primitive/nested.rs b/src/io/parquet/write/primitive/nested.rs
index 5be103d08b9..86732fdae97 100644
--- a/src/io/parquet/write/primitive/nested.rs
+++ b/src/io/parquet/write/primitive/nested.rs
@@ -1,22 +1,20 @@
-use parquet2::{
-    encoding::Encoding, metadata::ColumnDescriptor, page::DataPage, types::NativeType,
-    write::WriteOptions,
-};
+use parquet2::{encoding::Encoding, metadata::Descriptor, page::DataPage, types::NativeType};
 
 use super::super::levels;
 use super::super::utils;
+use super::super::WriteOptions;
 use super::basic::{build_statistics, encode_plain};
+use crate::io::parquet::read::schema::is_nullable;
 use crate::{
     array::{Array, Offset, PrimitiveArray},
     error::Result,
-    io::parquet::read::is_type_nullable,
     types::NativeType as ArrowNativeType,
 };
 
 pub fn array_to_page<T, R, O>(
     array: &PrimitiveArray<T>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     nested: levels::NestedInfo<O>,
 ) -> Result<DataPage>
 where
@@ -25,7 +23,7 @@ where
     T: num_traits::AsPrimitive<R>,
     O: Offset,
 {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -39,7 +37,7 @@ where
     encode_plain(array, is_optional, &mut buffer);
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -47,6 +45,7 @@ where
     utils::build_plain_page(
         buffer,
         levels::num_values(nested.offsets()),
+        nested.offsets().len().saturating_sub(1),
         array.null_count(),
         repetition_levels_byte_length,
         definition_levels_byte_length,
diff --git a/src/io/parquet/write/row_group.rs b/src/io/parquet/write/row_group.rs
index f6076808ac5..5c419640395 100644
--- a/src/io/parquet/write/row_group.rs
+++ b/src/io/parquet/write/row_group.rs
@@ -28,13 +28,15 @@ pub fn row_group_iter<A: AsRef<dyn Array> + 'static + Send + Sync>(
             .zip(columns.into_iter())
             .zip(encodings.into_iter())
             .map(move |((array, descriptor), encoding)| {
-                array_to_pages(array.as_ref(), descriptor, options, encoding).map(move |pages| {
-                    let encoded_pages = DynIter::new(pages.map(|x| Ok(x?)));
-                    let compressed_pages =
-                        Compressor::new(encoded_pages, options.compression, vec![])
-                            .map_err(ArrowError::from);
-                    DynStreamingIterator::new(compressed_pages)
-                })
+                array_to_pages(array.as_ref(), descriptor.descriptor, options, encoding).map(
+                    move |pages| {
+                        let encoded_pages = DynIter::new(pages.map(|x| Ok(x?)));
+                        let compressed_pages =
+                            Compressor::new(encoded_pages, options.compression, vec![])
+                                .map_err(ArrowError::from);
+                        DynStreamingIterator::new(compressed_pages)
+                    },
+                )
             }),
     )
 }
@@ -78,23 +80,19 @@ impl<A: AsRef<dyn Array> + 'static, I: Iterator<Item = Result<Chunk<A>>>> RowGro
 impl<A: AsRef<dyn Array> + 'static + Send + Sync, I: Iterator<Item = Result<Chunk<A>>>> Iterator
     for RowGroupIterator<A, I>
 {
-    type Item = Result<(RowGroupIter<'static, ArrowError>, usize)>;
+    type Item = Result<RowGroupIter<'static, ArrowError>>;
 
     fn next(&mut self) -> Option<Self::Item> {
         let options = self.options;
 
         self.iter.next().map(|maybe_chunk| {
             let chunk = maybe_chunk?;
-            let len = chunk.len();
             let encodings = self.encodings.clone();
-            Ok((
-                row_group_iter(
-                    chunk,
-                    encodings,
-                    self.parquet_schema.columns().to_vec(),
-                    options,
-                ),
-                len,
+            Ok(row_group_iter(
+                chunk,
+                encodings,
+                self.parquet_schema.columns().to_vec(),
+                options,
             ))
         })
     }
diff --git a/src/io/parquet/write/schema.rs b/src/io/parquet/write/schema.rs
index ea05e7e19cd..7bf5e48b6cd 100644
--- a/src/io/parquet/write/schema.rs
+++ b/src/io/parquet/write/schema.rs
@@ -2,8 +2,8 @@ use parquet2::{
     metadata::KeyValue,
     schema::{
         types::{
-            DecimalType, IntType, LogicalType, ParquetType, PhysicalType, PrimitiveConvertedType,
-            TimeType, TimeUnit as ParquetTimeUnit, TimestampType,
+            GroupLogicalType, IntegerType, ParquetType, PhysicalType, PrimitiveConvertedType,
+            PrimitiveLogicalType, TimeUnit as ParquetTimeUnit,
         },
         Repetition,
     },
@@ -53,7 +53,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             None,
-            Some(LogicalType::UNKNOWN(Default::default())),
+            Some(PrimitiveLogicalType::Unknown),
             None,
         )?),
         DataType::Boolean => Ok(ParquetType::try_from_primitive(
@@ -120,7 +120,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::ByteArray,
             repetition,
             Some(PrimitiveConvertedType::Utf8),
-            Some(LogicalType::STRING(Default::default())),
+            Some(PrimitiveLogicalType::String),
             None,
         )?),
         DataType::Date32 => Ok(ParquetType::try_from_primitive(
@@ -128,7 +128,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Date),
-            Some(LogicalType::DATE(Default::default())),
+            Some(PrimitiveLogicalType::Date),
             None,
         )?),
         DataType::Int8 => Ok(ParquetType::try_from_primitive(
@@ -136,10 +136,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Int8),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: true,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::Int8)),
             None,
         )?),
         DataType::Int16 => Ok(ParquetType::try_from_primitive(
@@ -147,10 +144,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Int16),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: true,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::Int16)),
             None,
         )?),
         DataType::UInt8 => Ok(ParquetType::try_from_primitive(
@@ -158,10 +152,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Uint8),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 8,
-                is_signed: false,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::UInt8)),
             None,
         )?),
         DataType::UInt16 => Ok(ParquetType::try_from_primitive(
@@ -169,10 +160,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Uint16),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 16,
-                is_signed: false,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::UInt16)),
             None,
         )?),
         DataType::UInt32 => Ok(ParquetType::try_from_primitive(
@@ -180,10 +168,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::Uint32),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 32,
-                is_signed: false,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::UInt32)),
             None,
         )?),
         DataType::UInt64 => Ok(ParquetType::try_from_primitive(
@@ -191,10 +176,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int64,
             repetition,
             Some(PrimitiveConvertedType::Uint64),
-            Some(LogicalType::INTEGER(IntType {
-                bit_width: 64,
-                is_signed: false,
-            })),
+            Some(PrimitiveLogicalType::Integer(IntegerType::UInt64)),
             None,
         )?),
         // no natural representation in parquet; leave it as is.
@@ -212,15 +194,15 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int64,
             repetition,
             None,
-            Some(LogicalType::TIMESTAMP(TimestampType {
-                is_adjusted_to_u_t_c: matches!(zone, Some(z) if !z.as_str().is_empty()),
+            Some(PrimitiveLogicalType::Timestamp {
+                is_adjusted_to_utc: matches!(zone, Some(z) if !z.as_str().is_empty()),
                 unit: match time_unit {
                     TimeUnit::Second => unreachable!(),
-                    TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()),
-                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
+                    TimeUnit::Millisecond => ParquetTimeUnit::Milliseconds,
+                    TimeUnit::Microsecond => ParquetTimeUnit::Microseconds,
+                    TimeUnit::Nanosecond => ParquetTimeUnit::Nanoseconds,
                 },
-            })),
+            }),
             None,
         )?),
         // no natural representation in parquet; leave it as is.
@@ -238,10 +220,10 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             PhysicalType::Int32,
             repetition,
             Some(PrimitiveConvertedType::TimeMillis),
-            Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
-                unit: ParquetTimeUnit::MILLIS(Default::default()),
-            })),
+            Some(PrimitiveLogicalType::Time {
+                is_adjusted_to_utc: false,
+                unit: ParquetTimeUnit::Milliseconds,
+            }),
             None,
         )?),
         DataType::Time64(time_unit) => Ok(ParquetType::try_from_primitive(
@@ -253,14 +235,14 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
                 TimeUnit::Nanosecond => None,
                 _ => unreachable!(),
             },
-            Some(LogicalType::TIME(TimeType {
-                is_adjusted_to_u_t_c: false,
+            Some(PrimitiveLogicalType::Time {
+                is_adjusted_to_utc: false,
                 unit: match time_unit {
-                    TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()),
-                    TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()),
+                    TimeUnit::Microsecond => ParquetTimeUnit::Microseconds,
+                    TimeUnit::Nanosecond => ParquetTimeUnit::Nanoseconds,
                     _ => unreachable!(),
                 },
-            })),
+            }),
             None,
         )?),
         DataType::Struct(fields) => {
@@ -274,9 +256,9 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
                 .iter()
                 .map(to_parquet_type)
                 .collect::<Result<Vec<_>>>()?;
-            Ok(ParquetType::try_from_group(
+            Ok(ParquetType::from_group(
                 name, repetition, None, None, fields, None,
-            )?)
+            ))
         }
         DataType::Dictionary(_, value, _) => {
             let dict_field = Field::new(name.as_str(), value.as_ref().clone(), field.is_nullable);
@@ -284,7 +266,7 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
         }
         DataType::FixedSizeBinary(size) => Ok(ParquetType::try_from_primitive(
             name,
-            PhysicalType::FixedLenByteArray(*size as i32),
+            PhysicalType::FixedLenByteArray(*size),
             repetition,
             None,
             None,
@@ -293,27 +275,21 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
         DataType::Decimal(precision, scale) => {
             let precision = *precision;
             let scale = *scale;
-            let logical_type = Some(LogicalType::DECIMAL(DecimalType {
-                scale: scale as i32,
-                precision: precision as i32,
-            }));
+            let logical_type = Some(PrimitiveLogicalType::Decimal(precision, scale));
 
             let physical_type = if precision <= 9 {
                 PhysicalType::Int32
             } else if precision <= 18 {
                 PhysicalType::Int64
             } else {
-                let len = decimal_length_from_precision(precision) as i32;
+                let len = decimal_length_from_precision(precision);
                 PhysicalType::FixedLenByteArray(len)
             };
             Ok(ParquetType::try_from_primitive(
                 name,
                 physical_type,
                 repetition,
-                Some(PrimitiveConvertedType::Decimal(
-                    precision as i32,
-                    scale as i32,
-                )),
+                Some(PrimitiveConvertedType::Decimal(precision, scale)),
                 logical_type,
                 None,
             )?)
@@ -327,21 +303,21 @@ pub fn to_parquet_type(field: &Field) -> Result<ParquetType> {
             None,
         )?),
         DataType::List(f) | DataType::FixedSizeList(f, _) | DataType::LargeList(f) => {
-            Ok(ParquetType::try_from_group(
+            Ok(ParquetType::from_group(
                 name,
                 repetition,
                 None,
-                Some(LogicalType::LIST(Default::default())),
-                vec![ParquetType::try_from_group(
+                Some(GroupLogicalType::List),
+                vec![ParquetType::from_group(
                     "list".to_string(),
                     Repetition::Repeated,
                     None,
                     None,
                     vec![to_parquet_type(f)?],
                     None,
-                )?],
+                )],
                 None,
-            )?)
+            ))
         }
         other => Err(ArrowError::NotYetImplemented(format!(
             "Writing the data type {:?} is not yet implemented",
diff --git a/src/io/parquet/write/sink.rs b/src/io/parquet/write/sink.rs
index 8906be9431b..1c483c4ed8b 100644
--- a/src/io/parquet/write/sink.rs
+++ b/src/io/parquet/write/sink.rs
@@ -1,16 +1,14 @@
-use crate::{
-    array::Array,
-    chunk::Chunk,
-    datatypes::Schema,
-    error::ArrowError,
-    io::parquet::write::{Encoding, SchemaDescriptor, WriteOptions},
-};
+use std::{collections::HashMap, pin::Pin, sync::Arc, task::Poll};
+
 use futures::{future::BoxFuture, AsyncWrite, FutureExt, Sink, TryFutureExt};
 use parquet2::metadata::KeyValue;
 use parquet2::write::FileStreamer;
-use std::{collections::HashMap, pin::Pin, sync::Arc, task::Poll};
+use parquet2::write::WriteOptions as ParquetWriteOptions;
+
+use crate::{array::Array, chunk::Chunk, datatypes::Schema, error::ArrowError};
 
 use super::file::add_arrow_schema;
+use super::{Encoding, SchemaDescriptor, WriteOptions};
 
 /// Sink that writes array [`chunks`](Chunk) as a Parquet file.
 ///
@@ -82,10 +80,17 @@ where
         encoding: Vec<Encoding>,
         options: WriteOptions,
     ) -> Result<Self, ArrowError> {
-        // let mut writer = FileStreamer::try_new(writer, schema.clone(), options)?;
         let parquet_schema = crate::io::parquet::write::to_parquet_schema(&schema)?;
         let created_by = Some("Arrow2 - Native Rust implementation of Arrow".to_string());
-        let mut writer = FileStreamer::new(writer, parquet_schema.clone(), options, created_by);
+        let mut writer = FileStreamer::new(
+            writer,
+            parquet_schema.clone(),
+            ParquetWriteOptions {
+                version: options.version,
+                write_statistics: options.write_statistics,
+            },
+            created_by,
+        );
         let task = Some(
             async move {
                 writer.start().await?;
@@ -150,7 +155,6 @@ where
     fn start_send(self: Pin<&mut Self>, item: Chunk<Arc<dyn Array>>) -> Result<(), Self::Error> {
         let this = self.get_mut();
         if let Some(mut writer) = this.writer.take() {
-            let count = item.len();
             let rows = crate::io::parquet::write::row_group_iter(
                 item,
                 this.encoding.clone(),
@@ -158,7 +162,7 @@ where
                 this.options,
             );
             this.task = Some(Box::pin(async move {
-                writer.write(rows, count).await?;
+                writer.write(rows).await?;
                 Ok(Some(writer))
             }));
             Ok(())
diff --git a/src/io/parquet/write/utf8/basic.rs b/src/io/parquet/write/utf8/basic.rs
index f1e8fd3d24c..3074e6fe738 100644
--- a/src/io/parquet/write/utf8/basic.rs
+++ b/src/io/parquet/write/utf8/basic.rs
@@ -1,17 +1,18 @@
 use parquet2::{
     encoding::Encoding,
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::DataPage,
+    schema::types::PrimitiveType,
     statistics::{serialize_statistics, BinaryStatistics, ParquetStatistics, Statistics},
-    write::WriteOptions,
 };
 
 use super::super::binary::{encode_delta, ord_binary};
 use super::super::utils;
+use super::super::WriteOptions;
 use crate::{
     array::{Array, Offset, Utf8Array},
     error::{ArrowError, Result},
-    io::parquet::read::is_type_nullable,
+    io::parquet::read::schema::is_nullable,
 };
 
 pub(crate) fn encode_plain<O: Offset>(
@@ -41,11 +42,11 @@ pub(crate) fn encode_plain<O: Offset>(
 pub fn array_to_page<O: Offset>(
     array: &Utf8Array<O>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     encoding: Encoding,
 ) -> Result<DataPage> {
     let validity = array.validity();
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let mut buffer = vec![];
     utils::write_def_levels(
@@ -77,7 +78,7 @@ pub fn array_to_page<O: Offset>(
     }
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -85,6 +86,7 @@ pub fn array_to_page<O: Offset>(
     utils::build_plain_page(
         buffer,
         array.len(),
+        array.len(),
         array.null_count(),
         0,
         definition_levels_byte_length,
@@ -95,12 +97,12 @@ pub fn array_to_page<O: Offset>(
     )
 }
 
-pub(super) fn build_statistics<O: Offset>(
+pub(crate) fn build_statistics<O: Offset>(
     array: &Utf8Array<O>,
-    descriptor: ColumnDescriptor,
+    primitive_type: PrimitiveType,
 ) -> ParquetStatistics {
     let statistics = &BinaryStatistics {
-        descriptor,
+        primitive_type,
         null_count: Some(array.null_count() as i64),
         distinct_count: None,
         max_value: array
diff --git a/src/io/parquet/write/utf8/mod.rs b/src/io/parquet/write/utf8/mod.rs
index ddeb6541605..eec1d695d1d 100644
--- a/src/io/parquet/write/utf8/mod.rs
+++ b/src/io/parquet/write/utf8/mod.rs
@@ -2,5 +2,6 @@ mod basic;
 mod nested;
 
 pub use basic::array_to_page;
+pub(crate) use basic::build_statistics;
 pub(crate) use basic::encode_plain;
 pub use nested::array_to_page as nested_array_to_page;
diff --git a/src/io/parquet/write/utf8/nested.rs b/src/io/parquet/write/utf8/nested.rs
index cb87fabf31f..32b83cc0d7e 100644
--- a/src/io/parquet/write/utf8/nested.rs
+++ b/src/io/parquet/write/utf8/nested.rs
@@ -1,26 +1,24 @@
-use parquet2::{
-    encoding::Encoding, metadata::ColumnDescriptor, page::DataPage, write::WriteOptions,
-};
+use parquet2::{encoding::Encoding, metadata::Descriptor, page::DataPage};
 
-use super::super::{levels, utils};
+use super::super::{levels, utils, WriteOptions};
 use super::basic::{build_statistics, encode_plain};
+use crate::io::parquet::read::schema::is_nullable;
 use crate::{
     array::{Array, Offset, Utf8Array},
     error::Result,
-    io::parquet::read::is_type_nullable,
 };
 
 pub fn array_to_page<O, OO>(
     array: &Utf8Array<O>,
     options: WriteOptions,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     nested: levels::NestedInfo<OO>,
 ) -> Result<DataPage>
 where
     OO: Offset,
     O: Offset,
 {
-    let is_optional = is_type_nullable(descriptor.type_());
+    let is_optional = is_nullable(&descriptor.primitive_type.field_info);
 
     let validity = array.validity();
 
@@ -34,7 +32,7 @@ where
     encode_plain(array, is_optional, &mut buffer);
 
     let statistics = if options.write_statistics {
-        Some(build_statistics(array, descriptor.clone()))
+        Some(build_statistics(array, descriptor.primitive_type.clone()))
     } else {
         None
     };
@@ -42,6 +40,7 @@ where
     utils::build_plain_page(
         buffer,
         levels::num_values(nested.offsets()),
+        nested.offsets().len().saturating_sub(1),
         array.null_count(),
         repetition_levels_byte_length,
         definition_levels_byte_length,
diff --git a/src/io/parquet/write/utils.rs b/src/io/parquet/write/utils.rs
index 6857bbc533f..851034d6ae9 100644
--- a/src/io/parquet/write/utils.rs
+++ b/src/io/parquet/write/utils.rs
@@ -3,12 +3,12 @@ use crate::bitmap::Bitmap;
 use parquet2::{
     compression::Compression,
     encoding::{hybrid_rle::encode_bool, Encoding},
-    metadata::ColumnDescriptor,
+    metadata::Descriptor,
     page::{DataPage, DataPageHeader, DataPageHeaderV1, DataPageHeaderV2},
     statistics::ParquetStatistics,
-    write::WriteOptions,
 };
 
+use super::WriteOptions;
 use crate::error::Result;
 
 use super::Version;
@@ -60,42 +60,42 @@ pub fn write_def_levels(
 #[allow(clippy::too_many_arguments)]
 pub fn build_plain_page(
     buffer: Vec<u8>,
-    len: usize,
+    num_values: usize,
+    num_rows: usize,
     null_count: usize,
     repetition_levels_byte_length: usize,
     definition_levels_byte_length: usize,
     statistics: Option<ParquetStatistics>,
-    descriptor: ColumnDescriptor,
+    descriptor: Descriptor,
     options: WriteOptions,
     encoding: Encoding,
 ) -> Result<DataPage> {
-    match options.version {
-        Version::V1 => {
-            let header = DataPageHeader::V1(DataPageHeaderV1 {
-                num_values: len as i32,
-                encoding: encoding.into(),
-                definition_level_encoding: Encoding::Rle.into(),
-                repetition_level_encoding: Encoding::Rle.into(),
-                statistics,
-            });
-
-            Ok(DataPage::new(header, buffer, None, descriptor))
-        }
-        Version::V2 => {
-            let header = DataPageHeader::V2(DataPageHeaderV2 {
-                num_values: len as i32,
-                encoding: encoding.into(),
-                num_nulls: null_count as i32,
-                num_rows: len as i32,
-                definition_levels_byte_length: definition_levels_byte_length as i32,
-                repetition_levels_byte_length: repetition_levels_byte_length as i32,
-                is_compressed: Some(options.compression != Compression::Uncompressed),
-                statistics,
-            });
-
-            Ok(DataPage::new(header, buffer, None, descriptor))
-        }
-    }
+    let header = match options.version {
+        Version::V1 => DataPageHeader::V1(DataPageHeaderV1 {
+            num_values: num_values as i32,
+            encoding: encoding.into(),
+            definition_level_encoding: Encoding::Rle.into(),
+            repetition_level_encoding: Encoding::Rle.into(),
+            statistics,
+        }),
+        Version::V2 => DataPageHeader::V2(DataPageHeaderV2 {
+            num_values: num_values as i32,
+            encoding: encoding.into(),
+            num_nulls: null_count as i32,
+            num_rows: num_rows as i32,
+            definition_levels_byte_length: definition_levels_byte_length as i32,
+            repetition_levels_byte_length: repetition_levels_byte_length as i32,
+            is_compressed: Some(options.compression != Compression::Uncompressed),
+            statistics,
+        }),
+    };
+    Ok(DataPage::new(
+        header,
+        buffer,
+        None,
+        descriptor,
+        Some(num_rows),
+    ))
 }
 
 /// Auxiliary iterator adapter to declare the size hint of an iterator.
diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs
index 5e1b59c7488..7c8daf23641 100644
--- a/tests/it/io/parquet/mod.rs
+++ b/tests/it/io/parquet/mod.rs
@@ -9,6 +9,7 @@ use arrow2::{
 use crate::io::ipc::read_gzip_json;
 
 mod read;
+mod read_indexes;
 mod write;
 mod write_async;
 
@@ -22,12 +23,18 @@ pub fn read_column<R: Read + Seek>(
     let metadata = read_metadata(&mut reader)?;
     let schema = infer_schema(&metadata)?;
 
+    // verify that we can read indexes
+    let _indexes = read_columns_indexes(
+        &mut reader,
+        metadata.row_groups[0].columns(),
+        &schema.fields,
+    )?;
+
     let column = schema
         .fields
         .iter()
         .enumerate()
-        .filter_map(|(i, f)| if f.name == column { Some(i) } else { None })
-        .next()
+        .find_map(|(i, f)| if f.name == column { Some(i) } else { None })
         .unwrap();
 
     let mut reader = FileReader::try_new(reader, Some(&[column]), None, None, None)?;
@@ -329,7 +336,7 @@ pub fn pyarrow_nullable(column: &str) -> Box<dyn Array> {
                 .collect::<Vec<_>>();
             Box::new(PrimitiveArray::<u32>::from(values))
         }
-        "string_large" => {
+        "int32_dict" => {
             let keys = PrimitiveArray::<i32>::from([Some(0), Some(1), None, Some(1)]);
             let values = Arc::new(PrimitiveArray::<i32>::from_slice([10, 200]));
             Box::new(DictionaryArray::<i32>::from_data(keys, values))
@@ -413,7 +420,13 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Option<Box<dyn Statistics>>
             min_value: Some(0),
             max_value: Some(9),
         }),
-        "string_large" => return None,
+        "int32_dict" => Box::new(PrimitiveStatistics {
+            data_type: DataType::Dictionary(IntegerType::Int32, Box::new(DataType::Int32), false),
+            null_count: Some(0),
+            distinct_count: None,
+            min_value: Some(10),
+            max_value: Some(200),
+        }),
         "decimal_9" => Box::new(PrimitiveStatistics::<i128> {
             distinct_count: None,
             null_count: Some(3),
@@ -716,12 +729,11 @@ fn integration_write(schema: &Schema, batches: &[Chunk<Arc<dyn Array>>]) -> Resu
 
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
-    let (_size, writer) = writer.end(None)?;
+    writer.end(None)?;
 
-    Ok(writer.into_inner())
+    Ok(writer.into_inner().into_inner())
 }
 
 type IntegrationRead = (Schema, Vec<Chunk<Arc<dyn Array>>>);
diff --git a/tests/it/io/parquet/read_indexes.rs b/tests/it/io/parquet/read_indexes.rs
new file mode 100644
index 00000000000..ade8b268f16
--- /dev/null
+++ b/tests/it/io/parquet/read_indexes.rs
@@ -0,0 +1,223 @@
+use std::io::Cursor;
+use std::sync::Arc;
+
+use arrow2::error::ArrowError;
+use arrow2::{array::*, datatypes::*, error::Result, io::parquet::read::*, io::parquet::write::*};
+use parquet2::indexes::{compute_rows, select_pages};
+use parquet2::read::IndexedPageReader;
+
+/// Returns 2 sets of pages with different the same number of rows distributed un-evenly
+fn pages(
+    arrays: &[&dyn Array],
+    encoding: Encoding,
+) -> Result<(Vec<EncodedPage>, Vec<EncodedPage>, Schema)> {
+    // create pages with different number of rows
+    let array11 = PrimitiveArray::<i64>::from_slice([1, 2, 3, 4]);
+    let array12 = PrimitiveArray::<i64>::from_slice([5]);
+    let array13 = PrimitiveArray::<i64>::from_slice([6]);
+
+    let schema = Schema::from(vec![
+        Field::new("a1", DataType::Int64, false),
+        Field::new(
+            "a2",
+            arrays[0].data_type().clone(),
+            arrays.iter().map(|x| x.null_count()).sum::<usize>() != 0usize,
+        ),
+    ]);
+
+    let parquet_schema = to_parquet_schema(&schema)?;
+
+    let options = WriteOptions {
+        write_statistics: true,
+        compression: Compression::Uncompressed,
+        version: Version::V1,
+    };
+
+    let pages1 = vec![
+        array_to_page(
+            &array11,
+            parquet_schema.columns()[0].descriptor.clone(),
+            options,
+            Encoding::Plain,
+        )?,
+        array_to_page(
+            &array12,
+            parquet_schema.columns()[0].descriptor.clone(),
+            options,
+            Encoding::Plain,
+        )?,
+        array_to_page(
+            &array13,
+            parquet_schema.columns()[0].descriptor.clone(),
+            options,
+            Encoding::Plain,
+        )?,
+    ];
+    let pages2 = arrays
+        .iter()
+        .flat_map(|array| {
+            array_to_pages(
+                *array,
+                parquet_schema.columns()[1].descriptor.clone(),
+                options,
+                encoding,
+            )
+            .unwrap()
+            .collect::<Result<Vec<_>>>()
+            .unwrap()
+        })
+        .collect::<Vec<_>>();
+
+    Ok((pages1, pages2, schema))
+}
+
+/// Tests reading pages while skipping indexes
+fn read_with_indexes(
+    (pages1, pages2, schema): (Vec<EncodedPage>, Vec<EncodedPage>, Schema),
+    expected: Arc<dyn Array>,
+) -> Result<()> {
+    let options = WriteOptions {
+        write_statistics: true,
+        compression: Compression::Uncompressed,
+        version: Version::V1,
+    };
+
+    let to_compressed = |pages: Vec<EncodedPage>| {
+        let encoded_pages = DynIter::new(pages.into_iter().map(Ok));
+        let compressed_pages =
+            Compressor::new(encoded_pages, options.compression, vec![]).map_err(ArrowError::from);
+        Result::Ok(DynStreamingIterator::new(compressed_pages))
+    };
+
+    let row_group = DynIter::new(vec![to_compressed(pages1), to_compressed(pages2)].into_iter());
+
+    let writer = vec![];
+    let mut writer = FileWriter::try_new(writer, schema, options)?;
+
+    writer.start()?;
+    writer.write(row_group)?;
+    writer.end(None)?;
+    let data = writer.into_inner();
+
+    let mut reader = Cursor::new(data);
+
+    let metadata = read_metadata(&mut reader)?;
+
+    let schema = infer_schema(&metadata)?;
+
+    let row_group = &metadata.row_groups[0];
+
+    let pages = read_pages_locations(&mut reader, row_group.columns())?;
+
+    // say we concluded from the indexes that we only needed the "6" from the first column, so second page.
+    let _indexes = read_columns_indexes(&mut reader, row_group.columns(), &schema.fields)?;
+    let intervals = compute_rows(&[false, true, false], &pages[0], row_group.num_rows())?;
+
+    // based on the intervals from c1, we compute which pages from the second column are required:
+    let pages = select_pages(&intervals, &pages[1], row_group.num_rows())?;
+
+    // and read them:
+    let c1 = &metadata.row_groups[0].columns()[1];
+
+    let pages = IndexedPageReader::new(reader, c1, pages, vec![], vec![]);
+    let pages = BasicDecompressor::new(pages, vec![]);
+
+    let arrays = column_iter_to_arrays(
+        vec![pages],
+        vec![&c1.descriptor().descriptor.primitive_type],
+        schema.fields[1].clone(),
+        row_group.num_rows() as usize,
+    )?;
+
+    let arrays = arrays.collect::<Result<Vec<_>>>()?;
+
+    assert_eq!(arrays, vec![expected]);
+    Ok(())
+}
+
+#[test]
+fn indexed_required_utf8() -> Result<()> {
+    let array21 = Utf8Array::<i32>::from_slice(["a", "b", "c"]);
+    let array22 = Utf8Array::<i32>::from_slice(["d", "e", "f"]);
+    let expected = Arc::new(Utf8Array::<i32>::from_slice(["e"])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_required_i32() -> Result<()> {
+    let array21 = Int32Array::from_slice([1, 2, 3]);
+    let array22 = Int32Array::from_slice([4, 5, 6]);
+    let expected = Arc::new(Int32Array::from_slice([5])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_optional_i32() -> Result<()> {
+    let array21 = Int32Array::from([Some(1), Some(2), None]);
+    let array22 = Int32Array::from([None, Some(5), Some(6)]);
+    let expected = Arc::new(Int32Array::from_slice([5])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_optional_utf8() -> Result<()> {
+    let array21 = Utf8Array::<i32>::from([Some("a"), Some("b"), None]);
+    let array22 = Utf8Array::<i32>::from([None, Some("e"), Some("f")]);
+    let expected = Arc::new(Utf8Array::<i32>::from_slice(["e"])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_required_fixed_len() -> Result<()> {
+    let array21 = FixedSizeBinaryArray::from_slice([[127], [128], [129]]);
+    let array22 = FixedSizeBinaryArray::from_slice([[130], [131], [132]]);
+    let expected = Arc::new(FixedSizeBinaryArray::from_slice([[131]])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_optional_fixed_len() -> Result<()> {
+    let array21 = FixedSizeBinaryArray::from([Some([127]), Some([128]), None]);
+    let array22 = FixedSizeBinaryArray::from([None, Some([131]), Some([132])]);
+    let expected = Arc::new(FixedSizeBinaryArray::from_slice([[131]])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_required_boolean() -> Result<()> {
+    let array21 = BooleanArray::from_slice([true, false, true]);
+    let array22 = BooleanArray::from_slice([false, false, true]);
+    let expected = Arc::new(BooleanArray::from_slice([false])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_optional_boolean() -> Result<()> {
+    let array21 = BooleanArray::from([Some(true), Some(false), None]);
+    let array22 = BooleanArray::from([None, Some(false), Some(true)]);
+    let expected = Arc::new(BooleanArray::from_slice([false])) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array21, &array22], Encoding::Plain)?, expected)
+}
+
+#[test]
+fn indexed_dict() -> Result<()> {
+    let indices = PrimitiveArray::from_values((0..6u64).map(|x| x % 2));
+    let values = PrimitiveArray::from_slice([4i32, 6i32]);
+    let array = DictionaryArray::from_data(indices, std::sync::Arc::new(values));
+
+    let indices = PrimitiveArray::from_slice(&[0u64]);
+    let values = PrimitiveArray::from_slice([4i32, 6i32]);
+    let expected = DictionaryArray::from_data(indices, std::sync::Arc::new(values));
+
+    let expected = Arc::new(expected) as Arc<dyn Array>;
+
+    read_with_indexes(pages(&[&array], Encoding::RleDictionary)?, expected)
+}
diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs
index c9141f4d515..06f3a706f02 100644
--- a/tests/it/io/parquet/write.rs
+++ b/tests/it/io/parquet/write.rs
@@ -49,12 +49,11 @@ fn round_trip(
 
     writer.start()?;
     for group in row_groups {
-        let (group, len) = group?;
-        writer.write(group, len)?;
+        writer.write(group?)?;
     }
-    let (_size, writer) = writer.end(None)?;
+    writer.end(None)?;
 
-    let data = writer.into_inner();
+    let data = writer.into_inner().into_inner();
 
     let (result, stats) = read_column(&mut Cursor::new(data), 0, "a1")?;
     assert_eq!(array.as_ref(), result.as_ref());
@@ -354,7 +353,7 @@ fn utf8_optional_v2_delta() -> Result<()> {
 #[test]
 fn i32_optional_v2_dict() -> Result<()> {
     round_trip(
-        "string_large",
+        "int32_dict",
         true,
         false,
         Version::V2,
@@ -366,7 +365,7 @@ fn i32_optional_v2_dict() -> Result<()> {
 #[test]
 fn i32_optional_v2_dict_compressed() -> Result<()> {
     round_trip(
-        "string_large",
+        "int32_dict",
         true,
         false,
         Version::V2,
diff --git a/tests/it/io/parquet/write_async.rs b/tests/it/io/parquet/write_async.rs
index 5f9d09515e5..86ff5434df6 100644
--- a/tests/it/io/parquet/write_async.rs
+++ b/tests/it/io/parquet/write_async.rs
@@ -7,14 +7,10 @@ use arrow2::{
     error::Result,
     io::parquet::{
         read::{infer_schema, read_columns_many_async, read_metadata_async, RowGroupDeserializer},
-        write::Encoding,
+        write::{Compression, Encoding, Version, WriteOptions},
     },
 };
 use futures::{future::BoxFuture, io::Cursor, SinkExt};
-use parquet2::{
-    compression::Compression,
-    write::{Version, WriteOptions},
-};
 
 use super::FileSink;