diff --git a/README.md b/README.md
index 93f6057..ab42f02 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ DataPack, contributed by the Mind Network Team, is to enable data transformation
### Dependencies
-* Python > 3.8
+* Python >= 3.8
* pip
* mindlakesdk
* arseeding
@@ -30,6 +30,7 @@ from minddatapack import DataPack
### More examples
* [use case of arweave in jupyter](/examples/use_case_arweave.ipynb)
+* [use case of IPFS in jupyter](/examples/use_case_ipfs.ipynb)
## code
```
@@ -37,6 +38,7 @@ mind-datapack-python
|-- minddatapack # source code
| |-- __init__.py
| |-- arweaveconnector.py
+| |-- ipfsconnector.py
| |-- localfileconnector.py
| |-- mindlakeconnector.py
| └-- utils.py
@@ -59,6 +61,8 @@ Full doc: [https://mind-network.gitbook.io/mind-lake-sdk](https://mind-network.g
* v1.0
* Initial Release
+* v1.0.1
+ * Add IPFS support
## License
diff --git a/examples/use_case_ipfs.ipynb b/examples/use_case_ipfs.ipynb
new file mode 100644
index 0000000..c427c26
--- /dev/null
+++ b/examples/use_case_ipfs.ipynb
@@ -0,0 +1,629 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 1: configuration"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Input your walletPrivateKey here:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "check env.walletPrivateKey: 2a776\n",
+ "check env.mindLakeAppKey: Kyunq\n",
+ "check env.MINDLAKE_GATEWAY: https://sdk.mindnetwork.xyz/node\n"
+ ]
+ }
+ ],
+ "source": [
+ "# make sure you configure mindlakesdk, please check env.py in the same folder\n",
+ "# please familar these configure, and you can read https://github.com/mind-network/mind-lake-sdk-python \n",
+ "import env\n",
+ "\n",
+ "print(\"check env.walletPrivateKey:\", env.walletPrivateKey[0:5])\n",
+ "print(\"check env.mindLakeAppKey:\", env.mindLakeAppKey[0:5])\n",
+ "print(\"check env.MINDLAKE_GATEWAY:\", env.MINDLAKE_GATEWAY)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Install depedancy and source code"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# please \"pip install mindlakesdk\" if not installed\n",
+ "import mindlakesdk\n",
+ "\n",
+ "# please \"pip install minddatapack\" if not installed, or git clone the source code\n",
+ "from minddatapack import DataPack\n",
+ "\n",
+ "# please \"pip install pandas\" if not installed, it is used to display structure content in the notebook\n",
+ "import pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# check if example_data.csv file exists in the same folder, this demo will start to load this csv file.\n",
+ "\n",
+ "# check IPFS service is running, if not, please start it by \"ipfs daemon\" in command line or start it in the IPFS desktop app"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 2. Local CSV -> MindLake\n",
+ "- Load data from a local CSV file without the metadata file, by defining each column manually.\n",
+ "- You can also load from a local backup into MindLake once you are familar, then no need to define each column. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " wallet_address | \n",
+ " register_date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid wallet_address register_date\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "fn_local_csv = './example_data.csv'\n",
+ "\n",
+ "# preview local csv\n",
+ "df = pandas.read_csv(fn_local_csv)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack1 = DataPack(env.walletPrivateKey)\n",
+ "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n",
+ " [\n",
+ " DataPack.Column('uid', DataPack.DataType.int4, False),\n",
+ " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n",
+ " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n",
+ " ])\n",
+ "assert result, result.message\n",
+ "df = pandas.DataFrame(dataPack1.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into a table in MindLake"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "dropTable result: Success\n",
+ "a_new_table has been saved to MindLake.\n"
+ ]
+ }
+ ],
+ "source": [
+ "mindlake = mindlakesdk.connect(env.walletPrivateKey, env.mindLakeAppKey, env.MINDLAKE_GATEWAY)\n",
+ "assert mindlake, mindlake.message\n",
+ "\n",
+ "# drop the table if exists\n",
+ "result = mindlake.datalake.dropTable('a_new_table')\n",
+ "print(\"dropTable result:\", result.message)\n",
+ "\n",
+ "# upload from local csv into mindlake\n",
+ "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n",
+ "assert result, result.message\n",
+ "print(\"a_new_table has been saved to MindLake.\")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "you can also check this new downloaded table in https://scan.mindnetwork.xyz/account/myData"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 3. MindLake -> IPFS\n",
+ "- Load data from a table in MindLake and save to IPFS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack2 = DataPack(env.walletPrivateKey)\n",
+ "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n",
+ "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n",
+ "assert result, result.message\n",
+ "df = pandas.DataFrame(dataPack2.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into IPFS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "test_table_encrypted.csv has been saved to IPFS.\n",
+ "\n",
+ "The IPFS ID is: QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM \n",
+ "\n",
+ "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM\n",
+ "\n",
+ "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM/test_table_encrypted.csv\n",
+ "\n",
+ "The content is:\n",
+ "uid,wallet_address,register_date\n",
+ "123,\\x4810c8a24f2f22243d778860d9ee5180d5375d6cbd22561e568c23bce63f7516d1d2887dcb1993de0133e1d1e002c06b38b56b25d23acac51e24e740f1c6c5e163,\\xf9977c69c6a3d716e615b344f4f66953a4b3d764892a5960460aba34a6c7645943\n",
+ "124,\\xa7fca7f580180de2e92df9ddf9608c2442dcfa947ba23e3be37bf4764f981670016df19ca4fb3e7295479de368c45122f2cca35d619a402ac28d437cfd0188233f,\\xf06543016d08a89717d34fea65b66b4b8878ae8f411598c3ce3cbde0795a58d2c7\n",
+ "125,\\xd26a30809f578388dfc97a0262190094c086060a380e7b83a5f2e9bd9cd680a6e272c900921ca7c5824391e4f6bc6854e622f59c0d891152e861ff7ba62f542c40,\\xfa94a16e90f53456b9d053537fed04802514b15a3506055acbbd519e67de1d6e1c\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# make sure you have started IPFS service on your local machine\n",
+ "result = dataPack2.saveToIPFS('test_table_encrypted.csv')\n",
+ "assert result, result.message\n",
+ "print(\"test_table_encrypted.csv has been saved to IPFS.\\n\")\n",
+ "ipfsHash = result.data\n",
+ "print(\"The IPFS ID is:\", ipfsHash, '\\n')\n",
+ "ipfs_local_url = \"http://127.0.0.1:8080/ipfs/%s\"%ipfsHash\n",
+ "ipfs_local_data_url = ipfs_local_url + '/test_table_encrypted.csv'\n",
+ "print(f\"You can check on local IPFS http service: {ipfs_local_url}\\n\")\n",
+ "print(f\"And the content on IPFS: {ipfs_local_data_url}\\n\")\n",
+ "import requests\n",
+ "content = requests.get(ipfs_local_data_url).text\n",
+ "print(\"The content is:\")\n",
+ "print(content)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Step 4. IPFS -> Local File\n",
+ "- Load data from IPFS and save to local file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "You can see file on IPFS are encrypted, but datapack can decrypted it locally\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 | \n",
+ " 2023-07-15 02:25:32.392441 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 0 1 2\n",
+ "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n",
+ "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataPack3 = DataPack(env.walletPrivateKey)\n",
+ "result = dataPack3.loadFromIPFS(ipfsHash)\n",
+ "assert result, result.message\n",
+ "print(\"You can see file on IPFS are encrypted, but datapack can decrypted it locally\")\n",
+ "df = pandas.DataFrame(dataPack3.data)\n",
+ "df"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "- Save the data into a local file with encryption"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "test_localtable_encrypted.csv has been saved to local file.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " uid | \n",
+ " wallet_address | \n",
+ " register_date | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 123 | \n",
+ " \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... | \n",
+ " \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 124 | \n",
+ " \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... | \n",
+ " \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 125 | \n",
+ " \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... | \n",
+ " \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " uid wallet_address \\\n",
+ "0 123 \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... \n",
+ "1 124 \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... \n",
+ "2 125 \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... \n",
+ "\n",
+ " register_date \n",
+ "0 \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... \n",
+ "1 \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... \n",
+ "2 \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n",
+ "assert result, result.message\n",
+ "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n",
+ "df = pandas.read_csv('test_localtable_encrypted.csv')\n",
+ "df"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "datapack-dev",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.17"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/minddatapack/__init__.py b/minddatapack/__init__.py
index e9f7938..7d3414a 100644
--- a/minddatapack/__init__.py
+++ b/minddatapack/__init__.py
@@ -3,9 +3,11 @@
import mindlakesdk
from mindlakesdk.utils import ResultType, DataType
from web3 import Web3
+import importlib.metadata
import minddatapack.arweaveconnector
import minddatapack.mindlakeconnector
import minddatapack.localfileconnector
+import minddatapack.ipfsconnector
from minddatapack.utils import Column
class DataPack:
@@ -18,8 +20,8 @@ def __init__(self, walletPrivateKey: str):
self.data = None
self.columnName = None
self.fileName = None
- self.filePath = None
self.primaryKey = None
+ self.version = importlib.metadata.version('minddatapack')
self.__walletPrivateKey = walletPrivateKey
web3 = Web3(Web3.HTTPProvider(mindlakesdk.settings.WEB3API))
self.__walletAccount = web3.eth.account.from_key(walletPrivateKey)
@@ -47,3 +49,9 @@ def saveToArweave(self, fileName: str, tokenName: str, arWalletFile: str = None)
def loadFromArweave(self, id: str, arGateway: str = 'https://arseed.web3infra.dev/'):
return minddatapack.arweaveconnector.loadFromArweave(self, id, arGateway)
+
+ def saveToIPFS(self, fileName: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None):
+ return minddatapack.ipfsconnector.saveToIPFS(self, fileName, apiEndpoint, apiKey, apiSecret)
+
+ def loadFromIPFS(self, ipfsCID: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None):
+ return minddatapack.ipfsconnector.loadFromIPFS(self, ipfsCID, apiEndpoint, apiKey, apiSecret)
diff --git a/minddatapack/arweaveconnector.py b/minddatapack/arweaveconnector.py
index 1d713e9..8012f4d 100644
--- a/minddatapack/arweaveconnector.py
+++ b/minddatapack/arweaveconnector.py
@@ -2,6 +2,7 @@
import os
import logging
import requests
+from datetime import datetime
from mindlakesdk.utils import ResultType
import minddatapack.utils
@@ -36,8 +37,8 @@ def saveToArweave(dataPack, fileName: str, tokenName: str, arWalletFile: str, et
os.remove(metaFileName)
def loadFromArweave(dataPack, id: str, arGateway: str):
- metaFileName = None
- dataFileName = None
+ cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv'
+ cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT
try:
if arGateway[-1] != '/':
arGateway += '/'
@@ -46,26 +47,23 @@ def loadFromArweave(dataPack, id: str, arGateway: str):
if metaResponse and metaResponse.status_code == 200:
txMeta = json.loads(metaResponse.text)
metadataJsonStr = txMeta['tags'][1]['value']
- metadata = json.loads(metadataJsonStr)
- dataFileName = metadata['FileName']
- metaFileName = dataFileName + minddatapack.utils.METADATA_EXT
- with open(metaFileName, 'wb') as file:
+ with open(cacheMetaFileName, 'wb') as file:
file.write(metadataJsonStr.encode('utf-8'))
dataUrl = arGateway + id
dataResponse = requests.get(dataUrl)
if dataResponse and dataResponse.status_code == 200:
- with open(dataFileName, 'wb') as file:
+ with open(cacheDataFileName, 'wb') as file:
file.write(dataResponse.content)
- return dataPack.loadFromLocalFile(dataFileName)
+ return dataPack.loadFromLocalFile(cacheDataFileName)
else:
return ResultType(60001, "Network error", None)
except Exception as e:
logging.debug("Exception:", str(e))
return ResultType(60014, "Fail to connect to Arweave", None)
finally:
- if dataFileName and os.path.exists(dataFileName):
- os.remove(dataFileName)
- if metaFileName and os.path.exists(metaFileName):
- os.remove(metaFileName)
+ if os.path.exists(cacheDataFileName):
+ os.remove(cacheDataFileName)
+ if os.path.exists(cacheMetaFileName):
+ os.remove(cacheMetaFileName)
diff --git a/minddatapack/ipfsconnector.py b/minddatapack/ipfsconnector.py
new file mode 100644
index 0000000..69db82b
--- /dev/null
+++ b/minddatapack/ipfsconnector.py
@@ -0,0 +1,85 @@
+from datetime import datetime
+import tarfile
+import requests
+import json
+import logging
+import os
+from mindlakesdk.utils import ResultType
+import minddatapack.utils
+
+
+def saveToIPFS(dataPack, fileName: str, apiEndpoint: str, apiKey: str, apiSecret: str) -> ResultType:
+ result = dataPack.saveToLocalFile(fileName)
+ if not result:
+ return result
+ metaFileName = fileName + minddatapack.utils.METADATA_EXT
+ try:
+ csvFile = open(fileName, 'rb')
+ metaFile = open(metaFileName, 'rb')
+ files = {}
+ files[fileName] = csvFile
+ files[metaFileName] = metaFile
+ if apiKey and apiSecret:
+ response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(apiKey,apiSecret))
+ else:
+ response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files)
+ if response and response.status_code == 200:
+ folderJson = response.text.splitlines()[-1]
+ ipfsHash = json.loads(folderJson)['Hash']
+ return ResultType(0, "Success", ipfsHash)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if csvFile:
+ csvFile.close()
+ if metaFile:
+ metaFile.close()
+ if os.path.exists(fileName):
+ os.remove(fileName)
+ if os.path.exists(metaFileName):
+ os.remove(metaFileName)
+
+def loadFromIPFS(dataPack, ipfsCID: str, apiEndpoint: str, apiKey: str, apiSecret: str):
+ cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz'
+ metaFileName = None
+ dataFileName = None
+ try:
+ if apiKey and apiSecret:
+ response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(apiKey,apiSecret))
+ else:
+ response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6')
+ if response and response.status_code == 200:
+ with open(cacheTarFileName, 'wb') as file:
+ file.write(response.content)
+ with tarfile.open(cacheTarFileName, "r:gz") as tar:
+ members = tar.getmembers()
+ if len(members) != 3:
+ return ResultType(60015, "Invalid DataPack data", None)
+ for member in members:
+ nameSplit = member.name.split('/')
+ if len(nameSplit) == 2:
+ if member.name.endswith(minddatapack.utils.METADATA_EXT):
+ metaFileName = member.name
+ elif member.name.endswith('.csv'):
+ dataFileName = member.name
+ tar.extract(member)
+ if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT:
+ return ResultType(60015, "Invalid DataPack data", None)
+ return dataPack.loadFromLocalFile(dataFileName)
+ else:
+ return ResultType(60001, "Network error", None)
+ except Exception as e:
+ logging.debug("Exception:", str(e))
+ return ResultType(60014, "Fail to connect to IPFS", None)
+ finally:
+ if os.path.exists(cacheTarFileName):
+ os.remove(cacheTarFileName)
+ if dataFileName and os.path.exists(dataFileName):
+ os.remove(dataFileName)
+ if metaFileName and os.path.exists(metaFileName):
+ os.remove(metaFileName)
+ if os.path.exists(ipfsCID):
+ os.rmdir(ipfsCID)
diff --git a/minddatapack/localfileconnector.py b/minddatapack/localfileconnector.py
index 4b2c1bc..1337df8 100644
--- a/minddatapack/localfileconnector.py
+++ b/minddatapack/localfileconnector.py
@@ -56,21 +56,20 @@ def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list,
with open(filePath, 'w') as file:
writer = csv.writer(file)
writer.writerow(dataPack.columnName)
- if ignoreEncrypt:
- for row in dataPack.data:
- writer.writerow(row)
- else:
- for row in dataPack.data:
- rowEncrypted = []
- for index, cell in enumerate(row):
- if columns[index].encrypt:
- encryptResult = __encrypt(cell, columns[index])
- if not encryptResult:
- return encryptResult
- rowEncrypted.append(encryptResult.data)
+ for row in dataPack.data:
+ rowEncoded = []
+ for index, cell in enumerate(row):
+ if not ignoreEncrypt and columns[index].encrypt:
+ encryptResult = __encrypt(cell, columns[index])
+ if not encryptResult:
+ return encryptResult
+ rowEncoded.append(encryptResult.data)
+ else:
+ if columns[index].type == DataType.timestamp:
+ rowEncoded.append(cell.strftime('%Y-%m-%d %H:%M:%S.%f'))
else:
- rowEncrypted.append(cell)
- writer.writerow(rowEncrypted)
+ rowEncoded.append(str(cell))
+ writer.writerow(rowEncoded)
sha256_hash = SHA256.new()
with open(filePath, 'rb') as file:
@@ -78,16 +77,17 @@ def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list,
sha256_hash.update(chunk)
sha256_hash_hex = sha256_hash.hexdigest()
- metadata = __buildMetadata(dataPack.fileName, ignoreEncrypt, sha256_hash_hex, columns, walletAccount)
+ metadata = __buildMetadata(dataPack.fileName, ignoreEncrypt, sha256_hash_hex, columns, walletAccount, dataPack.version)
with open(filePath+minddatapack.utils.METADATA_EXT, 'w') as file:
json.dump(metadata, file)
return ResultType(0, None)
-def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns: list, walletAccount) -> dict:
+def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns: list, walletAccount, version: str) -> dict:
metadata = {}
metadata['FileName'] = fileName
metadata['IgnoreEncrypt'] = ignoreEncrypt
metadata['FileHash'] = fileHash
+ metadata['Version'] = version
metadata['Column'] = []
for column in columns:
columnMeta = {}
@@ -128,25 +128,25 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount):
dataPack.columnName = next(reader)
dataPack.data = []
for row in reader:
- rowDecrypted = []
+ rowDecoded = []
for index, cell in enumerate(row):
if not ignoreEncrypt and columns[index].encrypt:
decryptResult = __decrypt(cell, columns[index])
if not decryptResult:
return decryptResult
- rowDecrypted.append(decryptResult.data)
+ rowDecoded.append(decryptResult.data)
else:
if columns[index].type == DataType.int4 or columns[index].type == DataType.int8:
- rowDecrypted.append(int(cell))
+ rowDecoded.append(int(cell))
elif columns[index].type == DataType.float4 or columns[index].type == DataType.float8:
- rowDecrypted.append(float(cell))
+ rowDecoded.append(float(cell))
elif columns[index].type == DataType.decimal:
- rowDecrypted.append(Decimal(cell))
+ rowDecoded.append(Decimal(cell))
elif columns[index].type == DataType.timestamp:
- rowDecrypted.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
+ rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f'))
else:
- rowDecrypted.append(cell)
- dataPack.data.append(rowDecrypted)
+ rowDecoded.append(cell)
+ dataPack.data.append(rowDecoded)
dataPack.existData = True
return ResultType(0, "Success"), columns
diff --git a/minddatapack/utils.py b/minddatapack/utils.py
index c900661..f09d688 100644
--- a/minddatapack/utils.py
+++ b/minddatapack/utils.py
@@ -3,6 +3,7 @@
from mindlakesdk.datalake import DataLake
METADATA_EXT = '.meta.json'
+CACHE_PREFIX = 'datapack_cache_'
class Column(DataLake.Column):
def __init__(self, columnName: str, dataType: DataType, encrypt: bool, dataKey: bytes = None):
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..bc70519
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,30 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "minddatapack"
+version = "v1.0.1"
+authors = [
+ { name="Mind Labs", email="biz@mindnetwork.xyz" },
+]
+description = "A Python SDK to migrate data between Mind Lake and other storages"
+readme = "README.md"
+requires-python = ">=3.8"
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+]
+keywords = ["web3", "encryption", "datalake"]
+dependencies = [
+ "mindlakesdk",
+ "arseeding"
+]
+
+[project.urls]
+"Homepage" = "https://github.com/mind-network/mind-datapack-python"
+"Bug Tracker" = "https://github.com/mind-network/mind-datapack-python/issues"
+
+[tool.hatch.build]
+exclude = ["/examples", "/tests", "/tutorial"]
\ No newline at end of file