In [1]:
import { parse } from 'csv-parse/sync';
import { readFileSync } from 'fs';
import { analysisGraph } from 'agentic-data-analysis';
import type { GraphState, Data } from 'agentic-data-analysis';

async function analyzeCSVData(filepath: string): Promise<GraphState> {
  try {
    // Read and parse CSV file
    const fileContent = readFileSync(filepath, 'utf-8');
    const records = parse(fileContent, {
      columns: true,
      skip_empty_lines: true,
      cast: true,
    }) as Data;

    // Initialize graph with data
    const result = await analysisGraph.invoke({
      data: records
    });

    return result;
  } catch (error) {
    console.error('Error analyzing CSV data:', error);
    throw error;
  }
}

In [2]:
const filepath = './healthcare_dataset.csv';
const analysis = await analyzeCSVData(filepath);

console.log('\nDataset Summary:');
console.log(analysis.metadata?.summary);

console.log('\nField Analysis:');
Object.entries(analysis.metadata?.fields ?? {}).forEach(([field, meta]) => {
  console.log(`\n${field}:`);
  console.log(meta.description);
});

console.log('\nData Quality Issues:');
console.log(analysis.metadata?.dataQualityIssues);


Dataset Summary:
### Dataset Summary

#### 1. Dataset Purpose and Content
This dataset appears to represent a comprehensive collection of healthcare-related records, likely from a hospital or a network of healthcare facilities. It captures various aspects of patient information, medical conditions, and healthcare services. The dataset's primary purpose is to provide a detailed view of patient demographics, medical conditions, healthcare interactions, and financial transactions. Key entities described include individuals (patients), healthcare providers (doctors and hospitals), and insurance providers.

- **Key Fields and Relationships:**
  - **Demographic Information:** Includes "Name," "Age," and "Gender," providing basic personal details.
  - **Medical Information:** Captures "Blood Type," "Medical Condition," "Medication," and "Test Results," detailing health status and treatments.
  - **Healthcare Interactions:** "Date of Admission," "Discharge Date," "Doctor," "Hospital," "Admiss

In [3]:
console.log(JSON.stringify(analysis.metadata));

{"fields":{"Name":{"type":"string","description":"The \"Name\" field represents the full names of individuals, formatted as a string. It contains 55,500 entries with no missing values, ensuring complete data coverage. There are 49,992 unique values, indicating that most names are distinct, though some repetition exists, possibly due to common names or data entry errors.\n\nKey characteristics include:\n- The names exhibit inconsistent capitalization, as seen in sample values like \"Bobby JacksOn\" and \"LesLie TErRy.\" This suggests a lack of standardization in data entry, which may require cleaning for uniformity.\n- The high number of unique values relative to total entries suggests a diverse dataset, likely representing a wide range of individuals.\n\nData quality concerns include the inconsistent use of capitalization, which could affect sorting and searching operations. Standardizing the format (e.g., capitalizing the first letter of each name) would enhance data usability.\n\nPot