In [2]:
import pandas as pd
import sys
import io
import json
import re

def read_csv_fast(file_path, keyword):
    usecols = ['DocNo', 'Debit', 'Credit', 'Balance', 'TransactionsInDetail']
    dtype = {
        'DocNo': str,
        'Debit': float,
        'Credit': float,
        'Balance': float,
        'TransactionsInDetail': str
    }

    chunksize = 10000
    keyword_lower = keyword.lower()
    results = []
    
    keyword_pattern = re.compile(r'\b' + re.escape(keyword_lower) + r'\b')

    for chunk in pd.read_csv(file_path, usecols=usecols, dtype=dtype, chunksize=chunksize):
        matching_rows = chunk[chunk.apply(lambda row: keyword_pattern.search(' '.join(row.astype(str)).lower()) is not None, axis=1)]
        for index, row in matching_rows.iterrows():
            row_dict = {col: dtype[col](row[col]) if pd.notna(row[col]) else None for col in usecols}
            results.append(row_dict)
    
    return results

file_path = 'data.csv'
keyword = 'Thao Minh'
results = read_csv_fast(file_path, keyword)

if isinstance(results, list) and all(isinstance(item, dict) for item in results):
    json_data = json.dumps(results, ensure_ascii=False, indent=4)
    print(json_data)
else:
    print("Error: The output is not a valid list of dictionaries.")

[
    {
        "DocNo": "5218.15261 10/09/2024",
        "Debit": null,
        "Credit": 200000.0,
        "Balance": null,
        "TransactionsInDetail": "716320.100924.065241.DO THAO MINH transfer-100924-06:52:42 716320"
    },
    {
        "DocNo": "5161.25955 10/09/2024",
        "Debit": null,
        "Credit": 10000.0,
        "Balance": null,
        "TransactionsInDetail": "PARTNER.DIRECT_DEBITS_VCB.MSE.6648 0321035.20240910.66480321035-0941000679_L E THAO MINH ung ho dong bao sau thien tai"
    },
    {
        "DocNo": "5214.54499",
        "Debit": null,
        "Credit": 20000.0,
        "Balance": null,
        "TransactionsInDetail": "872100.100924.133015.THACH THAO MINH THU chuyen tien"
    },
    {
        "DocNo": "5387.24797 10/09/2024",
        "Debit": null,
        "Credit": 100000.0,
        "Balance": null,
        "TransactionsInDetail": "020097042209101338532024IP4U389738.24797. 133854.NGUYEN THI THANH THAO minh xin gop 1 phan nho cho ba con. Mong moi nguoi