In [2]:
import pandas as pd
import psycopg2
import csv
from io import StringIO

# Connection string (replace with your own values)
conn = psycopg2.connect(
    "postgresql://neondb_owner:npg_F1T2wBfntjkZ@ep-dawn-surf-a43p87u0-pooler.us-east-1.aws.neon.tech/neondb?sslmode=require"
)


In [3]:
# Use a robust schema for your table
cur = conn.cursor()
cur.execute("""
    CREATE TABLE IF NOT EXISTS medical_qa (
        id SERIAL PRIMARY KEY,
        question TEXT,
        answer TEXT
    );
""")
conn.commit()
cur.close()


In [4]:
# Read with error skipping and strong quoting
df = pd.read_csv(
    "clean_medical_qa.csv", 
    quoting=csv.QUOTE_MINIMAL,  # Accepts minimal quoting, change to QUOTE_ALL if needed
    on_bad_lines="skip",        # Skips broken/malformed lines
    engine="python"
)

# Keep only first two columns, rename
df = df.iloc[:, :2]
df.columns = ["question", "answer"]

# (Optional) Clean any crazy whitespace or control chars
df['question'] = df['question'].astype(str).str.replace(r'[\r\n]+',' ', regex=True).str.strip()
df['answer'] = df['answer'].astype(str).str.replace(r'[\r\n]+',' ', regex=True).str.strip()


In [5]:
buffer = StringIO()
df.to_csv(
    buffer, index=False, header=False, sep=',', quotechar='"', escapechar='\\', lineterminator='\n', quoting=csv.QUOTE_MINIMAL
)
buffer.seek(0)

cur = conn.cursor()
try:
    cur.copy_expert("""
        COPY medical_qa (question, answer)
        FROM STDIN
        WITH (FORMAT csv, DELIMITER ',', QUOTE '"', ESCAPE '\\')
    """, buffer)
    conn.commit()
    print("✅ Data loaded successfully")
except Exception as e:
    conn.rollback()
    print("❌ Error:", e)
finally:
    cur.close()
    conn.close()


✅ Data loaded successfully
