In [2]:
pip install plaid-python

Note: you may need to restart the kernel to use updated packages.


In [16]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Installation des bibliothèques nécessaires
!pip install pandas numpy scikit-learn matplotlib seaborn nltk xgboost joblib plaid-python python-dotenv

Collecting pandas
  Using cached pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting numpy
  Downloading numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.1-cp313-cp313-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Collecting joblib
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downl

In [1]:
# Bibliothèques standard
import os
import json
import re
import datetime

# Bibliothèques pour l'environnement
from dotenv import load_dotenv, dotenv_values

# Bibliothèques d'analyse de données
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Bibliothèques de traitement de texte
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Bibliothèques de machine learning
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# Bibliothèques Plaid
from plaid.api import plaid_api
from plaid.model.plaid_error import PlaidError
from plaid.model.products import Products
from plaid.model.link_token_create_request import LinkTokenCreateRequest
from plaid.model.country_code import CountryCode
from plaid.model.link_token_get_request import LinkTokenGetRequest
from plaid.model.accounts_get_request import AccountsGetRequest
from plaid.model.transactions_get_request import TransactionsGetRequest
from plaid.configuration import Configuration
from plaid.api_client import ApiClient
from plaid.model.item_public_token_exchange_request import ItemPublicTokenExchangeRequest
from plaid.model.sandbox_public_token_create_request import SandboxPublicTokenCreateRequest

## Load environment variables

In [None]:
load_dotenv() 
env = 'sand'
PLAID_CLIENT_ID = os.getenv("PLAID_CLIENT_ID")
PLAID_SECRET = os.getenv("PLAID_SECRET_" + env)
PLAID_ENV = os.getenv("PLAID_ENV_" + env)

## Retreive Fake data from sandbox

### 1 - Generate access token


In [None]:
# Initialize Plaid client configuration

configuration = Configuration(
    host=PLAID_ENV,
    api_key={
        'clientId': PLAID_CLIENT_ID,
        'secret': PLAID_SECRET,
    }
)

api_client = ApiClient(configuration)
client = plaid_api.PlaidApi(api_client)

request = LinkTokenCreateRequest(
    products=[Products('auth'), Products('transactions')],
    client_name="mycompany",
    country_codes=[CountryCode('FR')],
    language='fr',
    user={'client_user_id': 'user_12345'}
)

response = client.link_token_create(request)
link_token = response['link_token']

# Generate public token
pt_request = SandboxPublicTokenCreateRequest(
    institution_id='ins_117650',
    initial_products=[Products('transactions')]
)
pt_response = client.sandbox_public_token_create(pt_request)

# The generated public_token can now be
# exchanged for an access_token
exchange_request = ItemPublicTokenExchangeRequest(
    public_token=pt_response['public_token']
)
exchange_response = client.item_public_token_exchange(exchange_request)
access_token = exchange_response.access_token

### 2 - Retrieve data using access token

#### Acount data

In [13]:
# Retrieve account data using access token
request = AccountsGetRequest(
    access_token=access_token
)

try:
    accounts_response = client.accounts_get(request)
    accounts = accounts_response['accounts']
    accounts_list = [account.to_dict() for account in accounts]  # Convert to dictionary
    print(json.dumps(accounts_list, indent=2))
except Exception as e:
    print("Error retrieving accounts:", e)
    if hasattr(e, 'body'):
        try:
            error_response = json.loads(e.body)
            print(json.dumps(error_response, indent=2))
        except json.JSONDecodeError:
            print("Error decoding JSON response")


[
  {
    "account_id": "5KLkwRoEVLH8E3GyB9QyT3EzlM7mMoU5rK1pE",
    "balances": {
      "available": 100.0,
      "current": 110.0,
      "limit": null,
      "iso_currency_code": "GBP",
      "unofficial_currency_code": null
    },
    "mask": "0000",
    "name": "Plaid Current Account",
    "official_name": "Plaid Standard Current Account",
    "type": "depository",
    "subtype": "checking",
    "holder_category": "personal"
  },
  {
    "account_id": "Jg1JG6Eeb1s35zJw7bWwCvezb6gB6DtB86D5K",
    "balances": {
      "available": 200.0,
      "current": 210.0,
      "limit": null,
      "iso_currency_code": "GBP",
      "unofficial_currency_code": null
    },
    "mask": "1111",
    "name": "Plaid Saving",
    "official_name": "Plaid Standard Interest Saving",
    "type": "depository",
    "subtype": "savings",
    "holder_category": "personal"
  },
  {
    "account_id": "kZJvePy5mJhX18a4VnR4Cnpgo1jb1eCL56l8r",
    "balances": {
      "available": null,
      "current": 410.0,
      

#### Transaction data

In [None]:
def serialize_response(obj):
    if isinstance(obj, (datetime.date, datetime.datetime)):
        return obj.isoformat()
    elif isinstance(obj, list):
        return [serialize_response(item) for item in obj]
    elif isinstance(obj, dict):
        return {key: serialize_response(value) for key, value in obj.items()}
    else:
        return obj

start_date = datetime.datetime.now() - datetime.timedelta(days=30)
end_date = datetime.datetime.now()

request = TransactionsGetRequest(
    access_token=access_token,
    start_date=start_date.date(),
    end_date=end_date.date()
)

try:
    transactions_response = client.transactions_get(request)
    transactions = transactions_response['transactions']
    
    # Convert to dictionary and handle serialization
    transactions_list = [serialize_response(transaction.to_dict()) for transaction in transactions]
    
    print(json.dumps(transactions_list, indent=2))
except Exception as e:
    print("Error retrieving transactions:", e)
    if hasattr(e, 'body'):
        try:
            error_response = json.loads(e.body)
            print(json.dumps(error_response, indent=2))
        except json.JSONDecodeError:
            print("Error decoding JSON response")


[
  {
    "account_id": "5KLkwRoEVLH8E3GyB9QyT3EzlM7mMoU5rK1pE",
    "account_owner": null,
    "amount": 280.0,
    "authorized_date": "2025-04-09",
    "authorized_datetime": null,
    "category": [
      "Payment",
      "Loan"
    ],
    "category_id": "16003000",
    "check_number": null,
    "counterparties": [
      {
        "name": "Loans 2 Go",
        "type": "merchant",
        "website": null,
        "logo_url": null,
        "confidence_level": "VERY_HIGH",
        "entity_id": null,
        "phone_number": null
      }
    ],
    "date": "2025-04-09",
    "datetime": null,
    "iso_currency_code": "GBP",
    "location": {
      "address": null,
      "city": null,
      "region": null,
      "postal_code": null,
      "country": null,
      "lat": null,
      "lon": null,
      "store_number": null
    },
    "logo_url": null,
    "merchant_entity_id": null,
    "merchant_name": "Loans 2 Go",
    "name": "Loans 2 Go",
    "payment_channel": "in store",
    "payment_meta

Real data from dev environment

Create ML model to categorize transactions

Compare with plaid categorization