In [4]:
import pandas as pd
from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser


In [5]:
def serialize_csv_for_llm(file_path, max_rows=100):
    """Reads a CSV and formats it into a clean string for an LLM prompt."""
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        return f"Error: The file was not found at {file_path}"
    
    sample = df.head(max_rows)

    # Create column header row
    columns_line = " | ".join(sample.columns)

    # Create data rows
    rows = []
    for _, row in sample.iterrows():
        row_str = " | ".join(str(cell) for cell in row)
        rows.append(row_str)

    # Combine everything into a single string
    table_text = f"Columns: {columns_line}\n\n"
    for idx, row in enumerate(rows):
        table_text += f"Row {idx+1}: {row}\n"
    
    return table_text

In [6]:
template = """
You are an expert data transformer. Your task is to modify the provided table data based on a specific instruction.

Instruction: {instruction}

Here is the table data in a simplified text format:
{table_text}

Please apply the instruction to the table. 
- Preserve the exact output format (column headers and row prefixes).
- Do not add any commentary, explanations, or code.
- Ensure all original rows and columns are present in the output.

Your final output should be only the transformed table data.
"""
prompt_template = PromptTemplate.from_template(template)

In [7]:
# ✅ Instantiate local LLM (Ollama with 'mistral')
llm = Ollama(model="mistral", temperature=0.3)

# ✅ Create LangChain chain
chain = prompt_template | llm | StrOutputParser()

  llm = Ollama(model="mistral", temperature=0.3)


In [8]:
def run_transformation():
    csv_path = r"C:\Users\sanka\OneDrive\Desktop\Tabulax\DATASETS\Tabulax datasets\ALL CSV\Govs-a.csv"
    output_file_path = "transformed_output_langchain.csv"

    # Step 1: Serialize the CSV
    table_text = serialize_csv_for_llm(csv_path)

    # Step 2: Define transformation instruction
    instruction = "In the 'Name' column, convert each full name to the format 'FirstInitial. LastName' (e.g., 'John Smith' becomes 'J. Smith'). Preserve all other columns as they are."

    # Step 3: Run the chain
    print("Sending request to the model...")
    response = chain.invoke({
        "table_text": table_text, 
        "instruction": instruction
    })
    print("Received response.")

    # Step 4: Save to file
    with open(output_file_path, "w", encoding="utf-8") as f:
        f.write(response)

    print(f"\n✅ Transformed output saved to: {output_file_path}")

# ✅ Call the transformation
run_transformation()


Sending request to the model...
Received response.

✅ Transformed output saved to: transformed_output_langchain.csv
