In [1]:
# Make sure paths are correct for the imports

import os
import sys

notebook_dir = os.path.abspath("")
parent_dir = os.path.dirname(notebook_dir)
grandparent_dir = os.path.dirname(parent_dir)


sys.path.append(grandparent_dir)

In [2]:
"""Load support tickets data from CSV into PostgreSQL database."""
import os
import csv
import psycopg2
from psycopg2 import sql
from dotenv import load_dotenv


load_dotenv(os.path.join(parent_dir, ".env"))

True

In [3]:
DB_CONFIG = {
    'host': os.getenv('PG_HOST', 'localhost'),
    'port': os.getenv('DB_PORT', '5432'),
    'database': os.getenv('PG_DATABASE', 'your_database'),
    'user': os.getenv('PG_USERNAME', 'postgres'),
    'password': os.getenv('PG_PASSWORD', '')
}


In [4]:
# CSV file path
CSV_FILE_PATH = '../data/synthetic_issue_dataset_with_customer_descriptions.csv'

In [5]:
def create_table(cursor):
    """Create the support_tickets table if it doesn't exist."""
    create_table_sql = """
    CREATE TABLE support_tickets (
        -- Primary identifiers
        ticket_id VARCHAR(20) PRIMARY KEY,
        servicenow_incident_id VARCHAR(20),
        servicenow_problem_id VARCHAR(20),
        
        -- Timestamps
        created TIMESTAMP NOT NULL,
        updated TIMESTAMP,
        resolved TIMESTAMP,
        
        -- Issue classification
        issue_summary TEXT NOT NULL,
        category VARCHAR(50),
        subcategory VARCHAR(50),
        priority VARCHAR(20),
        severity INTEGER CHECK (severity BETWEEN 1 AND 5),
        status VARCHAR(50),
        resolution_notes TEXT,
        
        -- Service impact
        affected_service VARCHAR(50),
        customer_impact VARCHAR(20),
        assigned_team VARCHAR(50),
        region VARCHAR(50),
        
        -- Testing details
        test_case_id VARCHAR(20),
        test_description TEXT,
        execution_date DATE,
        environment VARCHAR(20),
        pass_fail VARCHAR(10),
        error_logs TEXT,
        
        -- Release information
        feature VARCHAR(50),
        build_version VARCHAR(20),
        release_version VARCHAR(20),
        release_date DATE,
        new_features TEXT,
        bug_fixes TEXT,
        known_issues TEXT,
        deprecations TEXT,
        impacted_services VARCHAR(50),
        
        -- Customer interaction
        contact_channel VARCHAR(20),
        contact_volume INTEGER,
        top_contact_reason TEXT,
        customer_sentiment VARCHAR(20),
        escalation_rate NUMERIC(4,2),
        
        -- Bug tracking
        bug_id VARCHAR(20),
        reported_date DATE,
        steps_to_reproduce TEXT,
        linked_tickets VARCHAR(20),
        resolution_version VARCHAR(20),
        customer_description TEXT,
        customer_description_vector VECTOR(1536)
    );
    """
    cursor.execute(create_table_sql)
    print("Table created successfully or already exists.")



In [6]:
def load_csv_to_db(csv_file_path):
    """Load data from CSV file into PostgreSQL database."""
    try:
        # Connect to PostgreSQL
        conn = psycopg2.connect(**DB_CONFIG)
        cursor = conn.cursor()
        
        # Create table
        create_table(cursor)
        
        # Prepare insert statement
        insert_sql = """
        INSERT INTO support_tickets (
            ticket_id, created, updated, resolved, issue_summary, category, subcategory,
            priority, severity, status, resolution_notes, affected_service, customer_impact,
            assigned_team, region, test_case_id, test_description, execution_date, environment,
            pass_fail, error_logs, feature, build_version, release_version, release_date,
            new_features, bug_fixes, known_issues, deprecations, impacted_services,
            contact_channel, contact_volume, top_contact_reason, customer_sentiment,
            escalation_rate, bug_id, reported_date, steps_to_reproduce, linked_tickets,
            resolution_version, servicenow_incident_id, servicenow_problem_id, customer_description
        ) VALUES (
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
            %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
        )
        ON CONFLICT (ticket_id) DO NOTHING;
        """
        
        # Read and insert CSV data
        with open(csv_file_path, 'r', encoding='utf-8') as csv_file:
            csv_reader = csv.DictReader(csv_file)
            rows_inserted = 0
            
            for row in csv_reader:
                # Convert empty strings to None for nullable fields
                values = []
                for key in [
                    'ticket_id', 'created', 'updated', 'resolved', 'issue_summary', 'category',
                    'subcategory', 'priority', 'severity', 'status', 'resolution_notes',
                    'affected_service', 'customer_impact', 'assigned_team', 'region',
                    'test_case_id', 'test_description', 'execution_date', 'environment',
                    'pass_fail', 'error_logs', 'feature', 'build_version', 'release_version',
                    'release_date', 'new_features', 'bug_fixes', 'known_issues', 'deprecations',
                    'impacted_services', 'contact_channel', 'contact_volume', 'top_contact_reason',
                    'customer_sentiment', 'escalation_rate', 'bug_id', 'reported_date',
                    'steps_to_reproduce', 'linked_tickets', 'resolution_version',
                    'servicenow_incident_id', 'servicenow_problem_id', 'customer_description'
                ]:
                    value = row.get(key, '').strip()
                    values.append(None if value == '' else value)
                
                cursor.execute(insert_sql, values)
                rows_inserted += 1
            
            # Commit the transaction
            conn.commit()
            print(f"Successfully inserted {rows_inserted} rows into the database.")
        
        # Create indexes
        create_indexes(cursor)
        conn.commit()
        
        # Close connection
        cursor.close()
        conn.close()
        print("Database connection closed.")
        
    except Exception as e:
        print(f"Error: {e}")
        if conn:
            conn.rollback()


In [7]:
def create_indexes(cursor):
    """Create indexes for common queries."""
    indexes = [
        "CREATE INDEX IF NOT EXISTS idx_tickets_created ON support_tickets(created);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_status ON support_tickets(status);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_priority ON support_tickets(priority);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_category ON support_tickets(category);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_affected_service ON support_tickets(affected_service);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_bug_id ON support_tickets(bug_id);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_servicenow_incident ON support_tickets(servicenow_incident_id);",
        "CREATE INDEX IF NOT EXISTS idx_tickets_servicenow_problem ON support_tickets(servicenow_problem_id);"
    ]
    
    for index_sql in indexes:
        cursor.execute(index_sql)
    
    print("Indexes created successfully.")


In [8]:
if __name__ == "__main__":
    load_csv_to_db(CSV_FILE_PATH)

Table created successfully or already exists.
Successfully inserted 500 rows into the database.
Indexes created successfully.
Database connection closed.
