diff --git a/README.md b/README.md index 93f6057..ab42f02 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ DataPack, contributed by the Mind Network Team, is to enable data transformation ### Dependencies -* Python > 3.8 +* Python >= 3.8 * pip * mindlakesdk * arseeding @@ -30,6 +30,7 @@ from minddatapack import DataPack ### More examples * [use case of arweave in jupyter](/examples/use_case_arweave.ipynb) +* [use case of IPFS in jupyter](/examples/use_case_ipfs.ipynb) ## code ``` @@ -37,6 +38,7 @@ mind-datapack-python |-- minddatapack # source code | |-- __init__.py | |-- arweaveconnector.py +| |-- ipfsconnector.py | |-- localfileconnector.py | |-- mindlakeconnector.py | └-- utils.py @@ -59,6 +61,8 @@ Full doc: [https://mind-network.gitbook.io/mind-lake-sdk](https://mind-network.g * v1.0 * Initial Release +* v1.0.1 + * Add IPFS support ## License diff --git a/examples/use_case_ipfs.ipynb b/examples/use_case_ipfs.ipynb new file mode 100644 index 0000000..c427c26 --- /dev/null +++ b/examples/use_case_ipfs.ipynb @@ -0,0 +1,629 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Step 1: configuration" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input your walletPrivateKey here:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "check env.walletPrivateKey: 2a776\n", + "check env.mindLakeAppKey: Kyunq\n", + "check env.MINDLAKE_GATEWAY: https://sdk.mindnetwork.xyz/node\n" + ] + } + ], + "source": [ + "# make sure you configure mindlakesdk, please check env.py in the same folder\n", + "# please familar these configure, and you can read https://github.com/mind-network/mind-lake-sdk-python \n", + "import env\n", + "\n", + "print(\"check env.walletPrivateKey:\", env.walletPrivateKey[0:5])\n", + "print(\"check env.mindLakeAppKey:\", env.mindLakeAppKey[0:5])\n", + "print(\"check env.MINDLAKE_GATEWAY:\", env.MINDLAKE_GATEWAY)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install depedancy and source code" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# please \"pip install mindlakesdk\" if not installed\n", + "import mindlakesdk\n", + "\n", + "# please \"pip install minddatapack\" if not installed, or git clone the source code\n", + "from minddatapack import DataPack\n", + "\n", + "# please \"pip install pandas\" if not installed, it is used to display structure content in the notebook\n", + "import pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# check if example_data.csv file exists in the same folder, this demo will start to load this csv file.\n", + "\n", + "# check IPFS service is running, if not, please start it by \"ipfs daemon\" in command line or start it in the IPFS desktop app" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2. Local CSV -> MindLake\n", + "- Load data from a local CSV file without the metadata file, by defining each column manually.\n", + "- You can also load from a local backup into MindLake once you are familar, then no need to define each column. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidwallet_addressregister_date
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " uid wallet_address register_date\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fn_local_csv = './example_data.csv'\n", + "\n", + "# preview local csv\n", + "df = pandas.read_csv(fn_local_csv)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack1 = DataPack(env.walletPrivateKey)\n", + "result = dataPack1.loadFromCSVFileByDefineColumn(fn_local_csv, \n", + " [\n", + " DataPack.Column('uid', DataPack.DataType.int4, False),\n", + " DataPack.Column('wallet_address', DataPack.DataType.text, True),\n", + " DataPack.Column('register_date', DataPack.DataType.timestamp, True)\n", + " ])\n", + "assert result, result.message\n", + "df = pandas.DataFrame(dataPack1.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into a table in MindLake" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dropTable result: Success\n", + "a_new_table has been saved to MindLake.\n" + ] + } + ], + "source": [ + "mindlake = mindlakesdk.connect(env.walletPrivateKey, env.mindLakeAppKey, env.MINDLAKE_GATEWAY)\n", + "assert mindlake, mindlake.message\n", + "\n", + "# drop the table if exists\n", + "result = mindlake.datalake.dropTable('a_new_table')\n", + "print(\"dropTable result:\", result.message)\n", + "\n", + "# upload from local csv into mindlake\n", + "result = dataPack1.saveToMindLake('a_new_table', mindlake)\n", + "assert result, result.message\n", + "print(\"a_new_table has been saved to MindLake.\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "you can also check this new downloaded table in https://scan.mindnetwork.xyz/account/myData" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3. MindLake -> IPFS\n", + "- Load data from a table in MindLake and save to IPFS" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack2 = DataPack(env.walletPrivateKey)\n", + "# make sure if you complete Step 1, so \"a_new_table\" exists in mindlake \n", + "result = dataPack2.loadFromMindByQuery('select * from \"a_new_table\"', mindlake)\n", + "assert result, result.message\n", + "df = pandas.DataFrame(dataPack2.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into IPFS" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_table_encrypted.csv has been saved to IPFS.\n", + "\n", + "The IPFS ID is: QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM \n", + "\n", + "You can check on local IPFS http service: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM\n", + "\n", + "And the content on IPFS: http://127.0.0.1:8080/ipfs/QmYxCi1BBhbch496SDVP58VesPuTnraPgu57S84b1XiteM/test_table_encrypted.csv\n", + "\n", + "The content is:\n", + "uid,wallet_address,register_date\n", + "123,\\x4810c8a24f2f22243d778860d9ee5180d5375d6cbd22561e568c23bce63f7516d1d2887dcb1993de0133e1d1e002c06b38b56b25d23acac51e24e740f1c6c5e163,\\xf9977c69c6a3d716e615b344f4f66953a4b3d764892a5960460aba34a6c7645943\n", + "124,\\xa7fca7f580180de2e92df9ddf9608c2442dcfa947ba23e3be37bf4764f981670016df19ca4fb3e7295479de368c45122f2cca35d619a402ac28d437cfd0188233f,\\xf06543016d08a89717d34fea65b66b4b8878ae8f411598c3ce3cbde0795a58d2c7\n", + "125,\\xd26a30809f578388dfc97a0262190094c086060a380e7b83a5f2e9bd9cd680a6e272c900921ca7c5824391e4f6bc6854e622f59c0d891152e861ff7ba62f542c40,\\xfa94a16e90f53456b9d053537fed04802514b15a3506055acbbd519e67de1d6e1c\n", + "\n" + ] + } + ], + "source": [ + "# make sure you have started IPFS service on your local machine\n", + "result = dataPack2.saveToIPFS('test_table_encrypted.csv')\n", + "assert result, result.message\n", + "print(\"test_table_encrypted.csv has been saved to IPFS.\\n\")\n", + "ipfsHash = result.data\n", + "print(\"The IPFS ID is:\", ipfsHash, '\\n')\n", + "ipfs_local_url = \"http://127.0.0.1:8080/ipfs/%s\"%ipfsHash\n", + "ipfs_local_data_url = ipfs_local_url + '/test_table_encrypted.csv'\n", + "print(f\"You can check on local IPFS http service: {ipfs_local_url}\\n\")\n", + "print(f\"And the content on IPFS: {ipfs_local_data_url}\\n\")\n", + "import requests\n", + "content = requests.get(ipfs_local_data_url).text\n", + "print(\"The content is:\")\n", + "print(content)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4. IPFS -> Local File\n", + "- Load data from IPFS and save to local file" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You can see file on IPFS are encrypted, but datapack can decrypted it locally\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
01230x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
11240x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
21250x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB72023-07-15 02:25:32.392441
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 123 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "1 124 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441\n", + "2 125 0x79Be957bf7e3003aFd0e78f04Bacbc93D3ef2fB7 2023-07-15 02:25:32.392441" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataPack3 = DataPack(env.walletPrivateKey)\n", + "result = dataPack3.loadFromIPFS(ipfsHash)\n", + "assert result, result.message\n", + "print(\"You can see file on IPFS are encrypted, but datapack can decrypted it locally\")\n", + "df = pandas.DataFrame(dataPack3.data)\n", + "df" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- Save the data into a local file with encryption" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "test_localtable_encrypted.csv has been saved to local file.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
uidwallet_addressregister_date
0123\\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15...\\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd...
1124\\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4...\\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3...
2125\\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641...\\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0...
\n", + "
" + ], + "text/plain": [ + " uid wallet_address \\\n", + "0 123 \\x1295126db9fb5f12c77ca17a7b2230509f05ee225d15... \n", + "1 124 \\x3c9791f3a72caf6f8f4ed8d699c9efd63eccb60961b4... \n", + "2 125 \\xe2137c5d87fbaa2fe39f2fea8731992ea7a194b90641... \n", + "\n", + " register_date \n", + "0 \\x3f7a44eb7cdd6c1ff9433b4b911a0a400af3aafdc6dd... \n", + "1 \\x83cd814f9a8f2a1e1684a62e64d87f488517526441b3... \n", + "2 \\x550d9b3fc3d1edd7866990620a91ac5164c9fe9e30e0... " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = dataPack3.saveToLocalFile('test_localtable_encrypted.csv', False)\n", + "assert result, result.message\n", + "print(\"test_localtable_encrypted.csv has been saved to local file.\")\n", + "df = pandas.read_csv('test_localtable_encrypted.csv')\n", + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "datapack-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/minddatapack/__init__.py b/minddatapack/__init__.py index e9f7938..7d3414a 100644 --- a/minddatapack/__init__.py +++ b/minddatapack/__init__.py @@ -3,9 +3,11 @@ import mindlakesdk from mindlakesdk.utils import ResultType, DataType from web3 import Web3 +import importlib.metadata import minddatapack.arweaveconnector import minddatapack.mindlakeconnector import minddatapack.localfileconnector +import minddatapack.ipfsconnector from minddatapack.utils import Column class DataPack: @@ -18,8 +20,8 @@ def __init__(self, walletPrivateKey: str): self.data = None self.columnName = None self.fileName = None - self.filePath = None self.primaryKey = None + self.version = importlib.metadata.version('minddatapack') self.__walletPrivateKey = walletPrivateKey web3 = Web3(Web3.HTTPProvider(mindlakesdk.settings.WEB3API)) self.__walletAccount = web3.eth.account.from_key(walletPrivateKey) @@ -47,3 +49,9 @@ def saveToArweave(self, fileName: str, tokenName: str, arWalletFile: str = None) def loadFromArweave(self, id: str, arGateway: str = 'https://arseed.web3infra.dev/'): return minddatapack.arweaveconnector.loadFromArweave(self, id, arGateway) + + def saveToIPFS(self, fileName: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None): + return minddatapack.ipfsconnector.saveToIPFS(self, fileName, apiEndpoint, apiKey, apiSecret) + + def loadFromIPFS(self, ipfsCID: str, apiEndpoint: str = 'http://localhost:5001', apiKey: str = None, apiSecret: str = None): + return minddatapack.ipfsconnector.loadFromIPFS(self, ipfsCID, apiEndpoint, apiKey, apiSecret) diff --git a/minddatapack/arweaveconnector.py b/minddatapack/arweaveconnector.py index 1d713e9..8012f4d 100644 --- a/minddatapack/arweaveconnector.py +++ b/minddatapack/arweaveconnector.py @@ -2,6 +2,7 @@ import os import logging import requests +from datetime import datetime from mindlakesdk.utils import ResultType import minddatapack.utils @@ -36,8 +37,8 @@ def saveToArweave(dataPack, fileName: str, tokenName: str, arWalletFile: str, et os.remove(metaFileName) def loadFromArweave(dataPack, id: str, arGateway: str): - metaFileName = None - dataFileName = None + cacheDataFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.csv' + cacheMetaFileName = cacheDataFileName + minddatapack.utils.METADATA_EXT try: if arGateway[-1] != '/': arGateway += '/' @@ -46,26 +47,23 @@ def loadFromArweave(dataPack, id: str, arGateway: str): if metaResponse and metaResponse.status_code == 200: txMeta = json.loads(metaResponse.text) metadataJsonStr = txMeta['tags'][1]['value'] - metadata = json.loads(metadataJsonStr) - dataFileName = metadata['FileName'] - metaFileName = dataFileName + minddatapack.utils.METADATA_EXT - with open(metaFileName, 'wb') as file: + with open(cacheMetaFileName, 'wb') as file: file.write(metadataJsonStr.encode('utf-8')) dataUrl = arGateway + id dataResponse = requests.get(dataUrl) if dataResponse and dataResponse.status_code == 200: - with open(dataFileName, 'wb') as file: + with open(cacheDataFileName, 'wb') as file: file.write(dataResponse.content) - return dataPack.loadFromLocalFile(dataFileName) + return dataPack.loadFromLocalFile(cacheDataFileName) else: return ResultType(60001, "Network error", None) except Exception as e: logging.debug("Exception:", str(e)) return ResultType(60014, "Fail to connect to Arweave", None) finally: - if dataFileName and os.path.exists(dataFileName): - os.remove(dataFileName) - if metaFileName and os.path.exists(metaFileName): - os.remove(metaFileName) + if os.path.exists(cacheDataFileName): + os.remove(cacheDataFileName) + if os.path.exists(cacheMetaFileName): + os.remove(cacheMetaFileName) diff --git a/minddatapack/ipfsconnector.py b/minddatapack/ipfsconnector.py new file mode 100644 index 0000000..69db82b --- /dev/null +++ b/minddatapack/ipfsconnector.py @@ -0,0 +1,85 @@ +from datetime import datetime +import tarfile +import requests +import json +import logging +import os +from mindlakesdk.utils import ResultType +import minddatapack.utils + + +def saveToIPFS(dataPack, fileName: str, apiEndpoint: str, apiKey: str, apiSecret: str) -> ResultType: + result = dataPack.saveToLocalFile(fileName) + if not result: + return result + metaFileName = fileName + minddatapack.utils.METADATA_EXT + try: + csvFile = open(fileName, 'rb') + metaFile = open(metaFileName, 'rb') + files = {} + files[fileName] = csvFile + files[metaFileName] = metaFile + if apiKey and apiSecret: + response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files, auth=(apiKey,apiSecret)) + else: + response = requests.post(apiEndpoint + '/api/v0/add?pin=true&wrap-with-directory=true', files=files) + if response and response.status_code == 200: + folderJson = response.text.splitlines()[-1] + ipfsHash = json.loads(folderJson)['Hash'] + return ResultType(0, "Success", ipfsHash) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if csvFile: + csvFile.close() + if metaFile: + metaFile.close() + if os.path.exists(fileName): + os.remove(fileName) + if os.path.exists(metaFileName): + os.remove(metaFileName) + +def loadFromIPFS(dataPack, ipfsCID: str, apiEndpoint: str, apiKey: str, apiSecret: str): + cacheTarFileName = minddatapack.utils.CACHE_PREFIX + datetime.now().strftime("%Y%m%d%H%M%S%f") + '.tar.gz' + metaFileName = None + dataFileName = None + try: + if apiKey and apiSecret: + response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6', auth=(apiKey,apiSecret)) + else: + response = requests.post(apiEndpoint + f'/api/v0/get?arg={ipfsCID}&archive=true&compress=true&compression-level=6') + if response and response.status_code == 200: + with open(cacheTarFileName, 'wb') as file: + file.write(response.content) + with tarfile.open(cacheTarFileName, "r:gz") as tar: + members = tar.getmembers() + if len(members) != 3: + return ResultType(60015, "Invalid DataPack data", None) + for member in members: + nameSplit = member.name.split('/') + if len(nameSplit) == 2: + if member.name.endswith(minddatapack.utils.METADATA_EXT): + metaFileName = member.name + elif member.name.endswith('.csv'): + dataFileName = member.name + tar.extract(member) + if metaFileName != dataFileName + minddatapack.utils.METADATA_EXT: + return ResultType(60015, "Invalid DataPack data", None) + return dataPack.loadFromLocalFile(dataFileName) + else: + return ResultType(60001, "Network error", None) + except Exception as e: + logging.debug("Exception:", str(e)) + return ResultType(60014, "Fail to connect to IPFS", None) + finally: + if os.path.exists(cacheTarFileName): + os.remove(cacheTarFileName) + if dataFileName and os.path.exists(dataFileName): + os.remove(dataFileName) + if metaFileName and os.path.exists(metaFileName): + os.remove(metaFileName) + if os.path.exists(ipfsCID): + os.rmdir(ipfsCID) diff --git a/minddatapack/localfileconnector.py b/minddatapack/localfileconnector.py index 4b2c1bc..1337df8 100644 --- a/minddatapack/localfileconnector.py +++ b/minddatapack/localfileconnector.py @@ -56,21 +56,20 @@ def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list, with open(filePath, 'w') as file: writer = csv.writer(file) writer.writerow(dataPack.columnName) - if ignoreEncrypt: - for row in dataPack.data: - writer.writerow(row) - else: - for row in dataPack.data: - rowEncrypted = [] - for index, cell in enumerate(row): - if columns[index].encrypt: - encryptResult = __encrypt(cell, columns[index]) - if not encryptResult: - return encryptResult - rowEncrypted.append(encryptResult.data) + for row in dataPack.data: + rowEncoded = [] + for index, cell in enumerate(row): + if not ignoreEncrypt and columns[index].encrypt: + encryptResult = __encrypt(cell, columns[index]) + if not encryptResult: + return encryptResult + rowEncoded.append(encryptResult.data) + else: + if columns[index].type == DataType.timestamp: + rowEncoded.append(cell.strftime('%Y-%m-%d %H:%M:%S.%f')) else: - rowEncrypted.append(cell) - writer.writerow(rowEncrypted) + rowEncoded.append(str(cell)) + writer.writerow(rowEncoded) sha256_hash = SHA256.new() with open(filePath, 'rb') as file: @@ -78,16 +77,17 @@ def saveToLocalFile(dataPack, filePath: str, ignoreEncrypt: bool, columns: list, sha256_hash.update(chunk) sha256_hash_hex = sha256_hash.hexdigest() - metadata = __buildMetadata(dataPack.fileName, ignoreEncrypt, sha256_hash_hex, columns, walletAccount) + metadata = __buildMetadata(dataPack.fileName, ignoreEncrypt, sha256_hash_hex, columns, walletAccount, dataPack.version) with open(filePath+minddatapack.utils.METADATA_EXT, 'w') as file: json.dump(metadata, file) return ResultType(0, None) -def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns: list, walletAccount) -> dict: +def __buildMetadata(fileName: str, ignoreEncrypt: bool, fileHash: str, columns: list, walletAccount, version: str) -> dict: metadata = {} metadata['FileName'] = fileName metadata['IgnoreEncrypt'] = ignoreEncrypt metadata['FileHash'] = fileHash + metadata['Version'] = version metadata['Column'] = [] for column in columns: columnMeta = {} @@ -128,25 +128,25 @@ def loadFromLocalFile(dataPack, filePath: str, walletAccount): dataPack.columnName = next(reader) dataPack.data = [] for row in reader: - rowDecrypted = [] + rowDecoded = [] for index, cell in enumerate(row): if not ignoreEncrypt and columns[index].encrypt: decryptResult = __decrypt(cell, columns[index]) if not decryptResult: return decryptResult - rowDecrypted.append(decryptResult.data) + rowDecoded.append(decryptResult.data) else: if columns[index].type == DataType.int4 or columns[index].type == DataType.int8: - rowDecrypted.append(int(cell)) + rowDecoded.append(int(cell)) elif columns[index].type == DataType.float4 or columns[index].type == DataType.float8: - rowDecrypted.append(float(cell)) + rowDecoded.append(float(cell)) elif columns[index].type == DataType.decimal: - rowDecrypted.append(Decimal(cell)) + rowDecoded.append(Decimal(cell)) elif columns[index].type == DataType.timestamp: - rowDecrypted.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) + rowDecoded.append(datetime.datetime.strptime(cell, '%Y-%m-%d %H:%M:%S.%f')) else: - rowDecrypted.append(cell) - dataPack.data.append(rowDecrypted) + rowDecoded.append(cell) + dataPack.data.append(rowDecoded) dataPack.existData = True return ResultType(0, "Success"), columns diff --git a/minddatapack/utils.py b/minddatapack/utils.py index c900661..f09d688 100644 --- a/minddatapack/utils.py +++ b/minddatapack/utils.py @@ -3,6 +3,7 @@ from mindlakesdk.datalake import DataLake METADATA_EXT = '.meta.json' +CACHE_PREFIX = 'datapack_cache_' class Column(DataLake.Column): def __init__(self, columnName: str, dataType: DataType, encrypt: bool, dataKey: bytes = None): diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..bc70519 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "minddatapack" +version = "v1.0.1" +authors = [ + { name="Mind Labs", email="biz@mindnetwork.xyz" }, +] +description = "A Python SDK to migrate data between Mind Lake and other storages" +readme = "README.md" +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +keywords = ["web3", "encryption", "datalake"] +dependencies = [ + "mindlakesdk", + "arseeding" +] + +[project.urls] +"Homepage" = "https://github.com/mind-network/mind-datapack-python" +"Bug Tracker" = "https://github.com/mind-network/mind-datapack-python/issues" + +[tool.hatch.build] +exclude = ["/examples", "/tests", "/tutorial"] \ No newline at end of file