In [3]:
!pip install pyarrow



Collecting pyarrow
  Downloading pyarrow-9.0.0-cp37-cp37m-win_amd64.whl (19.5 MB)
     --------------------------------------- 19.5/19.5 MB 19.3 MB/s eta 0:00:00
Installing collected packages: pyarrow
Successfully installed pyarrow-9.0.0


In [29]:
import pyarrow as pa
import pandas as pd
import numpy as np
import json
import pyarrow.parquet as pq

In [9]:
df = pd.DataFrame(
  { 'temp': [12.1, 11, 13, 10, 10],
    'rain': [9.2, 10.0, 2.2, 0.2, 0.4] },
    index=pd.DatetimeIndex(['2020-10-12',
                            '2020-10-13',
                            '2020-10-14',
                            '2020-10-15',
                            '2020-10-16'],
                           name='date')
)

In [10]:
df

Unnamed: 0_level_0,temp,rain
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-12,12.1,9.2
2020-10-13,11.0,10.0
2020-10-14,13.0,2.2
2020-10-15,10.0,0.2
2020-10-16,10.0,0.4


In [40]:
custom_meta_content = {
    'user': 'Wáng Fān',
    'coord': '55.9533° N, 3.1883° W',
    'time': '2020-10-17T03:59:59+0000'  # ISO-8601
}
c=pd.DataFrame(custom_meta_content,index=["user","coord","time"])

In [19]:
custom_meta_key = 'weatherapp.iot'

In [20]:
table = pa.Table.from_pandas(df)

In [21]:
print(table.schema.metadata)

{b'pandas': b'{"index_columns": ["date"], "column_indexes": [{"name": null, "field_name": null, "pandas_type": "unicode", "numpy_type": "object", "metadata": {"encoding": "UTF-8"}}], "columns": [{"name": "temp", "field_name": "temp", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "rain", "field_name": "rain", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "date", "field_name": "date", "pandas_type": "datetime", "numpy_type": "datetime64[ns]", "metadata": null}], "creator": {"library": "pyarrow", "version": "9.0.0"}, "pandas_version": "1.3.4"}'}


In [22]:
custom_meta_content2 = {
    'user': 'abv',
    'coord': '55.9533° N, 3.1883° W',
    'time': '200-10-17T03:59:59+0000'  # ISO-8601
}

In [41]:
custom_meta_json = json.dumps(custom_meta_content)
existing_meta = table.schema.metadata
combined_meta = {
    custom_meta_key.encode() : custom_meta_json.encode(),
    **existing_meta
}

In [42]:
table = table.replace_schema_metadata(combined_meta)

In [43]:
print(table.schema.metadata)

{b'weatherapp.iot': b'{"user": "W\\u00e1ng F\\u0101n", "coord": "55.9533\\u00b0 N, 3.1883\\u00b0 W", "time": "2020-10-17T03:59:59+0000"}', b'pandas': b'{"index_columns": ["date"], "column_indexes": [{"name": null, "field_name": null, "pandas_type": "unicode", "numpy_type": "object", "metadata": {"encoding": "UTF-8"}}], "columns": [{"name": "temp", "field_name": "temp", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "rain", "field_name": "rain", "pandas_type": "float64", "numpy_type": "float64", "metadata": null}, {"name": "date", "field_name": "date", "pandas_type": "datetime", "numpy_type": "datetime64[ns]", "metadata": null}], "creator": {"library": "pyarrow", "version": "9.0.0"}, "pandas_version": "1.3.4"}'}


In [30]:
pq.write_table(table, 'example.parquet', compression='GZIP')

In [31]:
restored_table = pq.read_table('example.parquet')

In [48]:
restored_table

pyarrow.Table
temp: double
rain: double
date: timestamp[us]
----
temp: [[12.1,11,13,10,10]]
rain: [[9.2,10,2.2,0.2,0.4]]
date: [[2020-10-12 00:00:00.000000,2020-10-13 00:00:00.000000,2020-10-14 00:00:00.000000,2020-10-15 00:00:00.000000,2020-10-16 00:00:00.000000]]

In [53]:
restored_df = restored_table.to_pandas()
restored_df['x']=[1,2,3,4,5]
restored_df

Unnamed: 0_level_0,temp,rain,x
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-10-12,12.1,9.2,1
2020-10-13,11.0,10.0,2
2020-10-14,13.0,2.2,3
2020-10-15,10.0,0.2,4
2020-10-16,10.0,0.4,5


In [57]:
restored_meta_json = restored_table.schema.metadata[custom_meta_key.encode()]


In [58]:
restored_meta = json.loads(restored_meta_json)

In [59]:
restored_meta

{'user': 'Wáng Fān',
 'coord': '55.9533° N, 3.1883° W',
 'time': '2020-10-17T03:59:59+0000'}

<function str.encode(encoding='utf-8', errors='strict')>