In [1]:
%reload_ext autoreload
%autoreload 2

# Imports
Import the required classes and functions.

In [2]:
from mleko.data.sources import KaggleDataSource
from mleko.data.converters import CsvToArrowConverter
from mleko.pipeline import Pipeline
from mleko.pipeline.steps import IngestStep, ConvertStep

# Constants
Define configuration variables.

In [3]:
OWNER_SLUG = 'mlg-ulb'
DATASET_SLUG = 'creditcardfraud'
DATASET_NAME = f'{OWNER_SLUG}/{DATASET_SLUG}'

# Pipeline Setup

In [10]:
kaggle_data_source = KaggleDataSource(f"data/{DATASET_NAME}/raw", owner_slug=OWNER_SLUG, dataset_slug=DATASET_SLUG)
csv_to_arrow_converter = CsvToArrowConverter(output_directory=f"data/{DATASET_NAME}/converted", downcast_float=True)

pipeline = Pipeline(steps=[
    IngestStep(kaggle_data_source, outputs=["raw_data"]),
    ConvertStep(csv_to_arrow_converter, inputs=["raw_data"], outputs=["converted_data"])
])

[2023-05-19 09:05:14] [[1;32mINFO[0m] Attempting to fetch Kaggle API credentials from environment variables 'KAGGLE_USERNAME' and 'KAGGLE_KEY'. [1m(kaggle_data_source.py:77)[0m
[2023-05-19 09:05:14] [[1;32mINFO[0m] Kaggle credentials successfully fetched. [1m(kaggle_data_source.py:94)[0m


# Run Pipeline

In [12]:
pipeline.run().data['converted_data']

[2023-05-19 09:05:23] [[1;32mINFO[0m] Executing step 1: IngestStep [1m(pipeline.py:74)[0m
[2023-05-19 09:05:24] [[1;32mINFO[0m] Local dataset is up to date with Kaggle, skipping download. [1m(kaggle_data_source.py:271)[0m
[2023-05-19 09:05:24] [[1;32mINFO[0m] Finished step 1 [1m(pipeline.py:76)[0m
[2023-05-19 09:05:24] [[1;32mINFO[0m] Executing step 2: ConvertStep [1m(pipeline.py:74)[0m
[2023-05-19 09:05:24] [[1;32mINFO[0m] [32mCache Hit[0m (LRUCache) CsvToArrowConverter.convert: Using cached output. [1m(cache.py:104)[0m
[2023-05-19 09:05:24] [[1;32mINFO[0m] Finished step 2 [1m(pipeline.py:76)[0m


#,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.3598071336738,-0.0727811733098497,2.53634673796914,1.37815522427443,-0.338320769942518,0.462387777762292,0.239598554061257,0.0986979012610507,0.363786969611213,0.0907941719789316,-0.551599533260813,-0.617800855762348,-0.991389847235408,-0.311169353699879,1.46817697209427,-0.470400525259478,0.207971241929242,0.0257905801985591,0.403992960255733,0.251412098239705,-0.018306777944153,0.277837575558899,-0.110473910188767,0.0669280749146731,0.128539358273528,-0.189114843888824,0.133558376740387,-0.0210530534538215,149.62,0
1,0.0,1.19185711131486,0.26615071205963,0.16648011335321,0.448154078460911,0.0600176492822243,-0.0823608088155687,-0.0788029833323113,0.0851016549148104,-0.255425128109186,-0.166974414004614,1.61272666105479,1.06523531137287,0.48909501589608,-0.143772296441519,0.635558093258208,0.463917041022171,-0.114804663102346,-0.183361270123994,-0.145783041325259,-0.0690831352230203,-0.225775248033138,-0.638671952771851,0.101288021253234,-0.339846475529127,0.167170404418143,0.125894532368176,-0.00898309914322813,0.0147241691924927,2.69,0
2,1.0,-1.35835406159823,-1.34016307473609,1.77320934263119,0.379779593034328,-0.503198133318193,1.80049938079263,0.791460956450422,0.247675786588991,-1.51465432260583,0.207642865216696,0.624501459424895,0.066083685268831,0.717292731410831,-0.165945922763554,2.34586494901581,-2.89008319444231,1.10996937869599,-0.121359313195888,-2.26185709530414,0.524979725224404,0.247998153469754,0.771679401917229,0.909412262347719,-0.689280956490685,-0.327641833735251,-0.139096571514147,-0.0553527940384261,-0.0597518405929204,378.66,0
3,1.0,-0.966271711572087,-0.185226008082898,1.79299333957872,-0.863291275036453,-0.0103088796030823,1.24720316752486,0.23760893977178,0.377435874652262,-1.38702406270197,-0.0549519224713749,-0.226487263835401,0.178228225877303,0.507756869957169,-0.28792374549456,-0.631418117709045,-1.0596472454325,-0.684092786345479,1.96577500349538,-1.2326219700892,-0.208037781160366,-0.108300452035545,0.00527359678253453,-0.190320518742841,-1.17557533186321,0.647376034602038,-0.221928844458407,0.0627228487293033,0.0614576285006353,123.5,0
4,2.0,-1.15823309349523,0.877736754848451,1.548717846511,0.403033933955121,-0.407193377311653,0.0959214624684256,0.592940745385545,-0.270532677192282,0.817739308235294,0.753074431976354,-0.822842877946363,0.53819555014995,1.3458515932154,-1.11966983471731,0.175121130008994,-0.451449182813529,-0.237033239362776,-0.0381947870352842,0.803486924960175,0.408542360392758,-0.00943069713232919,0.79827849458971,-0.137458079619063,0.141266983824769,-0.206009587619756,0.502292224181569,0.219422229513348,0.215153147499206,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.8811178854323,10.0717849710003,-9.83478345739033,-2.06665568459413,-5.36447278097585,-2.6068373309456,-4.91821543115252,7.305334020798,1.9144282734458,4.35617041320691,-1.59310526246153,2.71194079105717,-0.68925560917964,4.62694202525016,-0.92445871482528,1.10764060095394,1.99169110706243,0.510632329130648,-0.682919680356916,1.4758291346555,0.213454108437359,0.111863735978609,1.01447989719391,-0.509348453168509,1.4368069070214,0.250034279569581,0.943651171507532,0.823730961486502,0.77,0
284803,172787.0,-0.732788670658956,-0.0550804899173326,2.03502974528243,-0.73858858438744,0.868229398914682,1.05841527222565,0.0243296959175797,0.294868698501783,0.584800017281683,-0.975926063322508,-0.150188847103276,0.915801914403555,1.21475584849424,-0.675142955809576,1.16493090944588,-0.711757349978881,-0.025692855268572,-1.22117885840624,-1.54555608554509,0.0596158998872689,0.214205341747019,0.924383584903381,0.0124630383316793,-1.01622566867336,-0.606623985854728,-0.395255065710324,0.0684724700405593,-0.0535273892010011,24.79,0
284804,172788.0,1.91956500980048,-0.301253845990644,-3.24963981406834,-0.55782812475002,2.63051512011547,3.03126009781428,-0.296826527116156,0.708417184967134,0.432454047632915,-0.484781755751029,0.411613736794327,0.0631188625621446,-0.183698687930443,-0.510601843577723,1.32928351250595,0.140715981685477,0.313501786950651,0.395652479334165,-0.577251842501113,0.00139597028995166,0.232045035925397,0.57822900992263,-0.0375008550221367,0.640133881346421,0.265745453243744,-0.0873705959041059,0.00445477213829229,-0.0265608285615222,67.88,0
284805,172788.0,-0.240440049680947,0.530482513118839,0.702510230095103,0.689799168040973,-0.377961134444982,0.62370772214768,-0.68617998628885,0.679145459790659,0.392086712465972,-0.399125651432835,-1.93384881505713,-0.962886142890271,-1.04208165591191,0.449624443166001,1.96256312066577,-0.60857712704613,0.509928460110321,1.11398059049908,2.89784877334313,0.127433515805355,0.265244916386865,0.800048741498139,-0.163297944406659,0.123205243742508,-0.569158864158597,0.546668462188323,0.108820734744839,0.104532821478796,10.0,0
