In [1]:
import json
import s3fs
import pandas as pd

In [2]:
fs = s3fs.S3FileSystem(
    anon=False,
    key="minio",
    secret="minio123",
    client_kwargs={
        "endpoint_url": "http://localhost:9000",
    },
)

In [3]:
# List files/folders in bucket
fs.ls("test")

['test/people.csv', 'test/people.json', 'test/people.parquet']

In [4]:
# Read csv files
with fs.open("test/people.csv", "rb") as f:
    my_df = pd.read_csv(f)
my_df

Unnamed: 0,name,age
0,Michael,31
1,Andy,30
2,Justin,19


In [5]:
# DataFrame add one row
my_df.loc[len(my_df)] = ["John", "18"]
my_df

Unnamed: 0,name,age
0,Michael,31
1,Andy,30
2,Justin,19
3,John,18


In [6]:
# Write back to MinIO
with fs.open("test/people2.csv", "w") as f:
    my_df.to_csv(f, index=False)
with fs.open("test/people2.csv", "rb") as f:
    my_df = pd.read_csv(f)
my_df

Unnamed: 0,name,age
0,Michael,31
1,Andy,30
2,Justin,19
3,John,18


In [7]:
# Read parquet files (require pyarrow)
with fs.open("test/people.parquet", "rb") as f:
    my_df = pd.read_parquet(f)
my_df

Unnamed: 0,name,age
0,Michael,31
1,Andy,30
2,Justin,19


In [8]:
# Read compressed csv files
with fs.open(
    "mycsvbucket/sampledata/TotalPopulation.csv.gz", "rb", compression="gzip"
) as f:
    my_df = pd.read_csv(f)
my_df

Unnamed: 0,LocID,Location,VarID,Variant,Time,MidPeriod,PopMale,PopFemale,PopTotal,PopDensity
0,4,Afghanistan,2,Medium,1950,1950.5,4099.243,3652.874,7752.117,11.874
1,4,Afghanistan,2,Medium,1951,1951.5,4134.756,3705.395,7840.151,12.009
2,4,Afghanistan,2,Medium,1952,1952.5,4174.450,3761.546,7935.996,12.156
3,4,Afghanistan,2,Medium,1953,1953.5,4218.336,3821.348,8039.684,12.315
4,4,Afghanistan,2,Medium,1954,1954.5,4266.484,3884.832,8151.316,12.486
...,...,...,...,...,...,...,...,...,...,...
280927,716,Zimbabwe,207,Lower 95 PI,2080,2080.5,10576.533,11255.983,21836.893,56.448
280928,716,Zimbabwe,207,Lower 95 PI,2085,2085.5,10293.349,11050.875,21355.988,55.205
280929,716,Zimbabwe,207,Lower 95 PI,2090,2090.5,9920.336,10767.709,20689.956,53.483
280930,716,Zimbabwe,207,Lower 95 PI,2095,2095.5,9503.711,10412.184,19892.080,51.421


In [9]:
# Read json files
with fs.open("test/people.json", "r") as f:
    my_json = json.load(f)
my_json

[{'name': 'Michael', 'age': 31},
 {'name': 'Andy', 'age': 30},
 {'name': 'Justin', 'age': 19}]