In [11]:
import json
import os
import mysql.connector
import pandas as pd
import re

# --- Database Connection Configuration ---
DB_CONFIG = {
    'host': 'localhost',
    'user': 'root', 
    'password': 'sandeep', 
    'database': 'phonepe_pulse_db'
}


In [12]:
PULSE_DATA_BASE_PATH = r'C:\Users\hanum\OneDrive\Desktop\Labmentix\week6\data'

def connect_db():
    """Establishes a database connection."""
    try:
        connection = mysql.connector.connect(**DB_CONFIG)
        if connection.is_connected():
            print("Successfully connected to MySQL database!")
        return connection
    except mysql.connector.Error as err:
        print(f"Error connecting to MySQL: {err}")
        return None

In [13]:

def execute_sql_query(query):

    connection = None
    try:
        connection = mysql.connector.connect(**DB_CONFIG)
        if connection.is_connected():
            cursor = connection.cursor(dictionary=True) # Fetch results as dictionaries
            cursor.execute(query)
            results = cursor.fetchall()
            
            if results:
                return pd.DataFrame(results)
            else:
                return pd.DataFrame() # Return empty DataFrame if no results
    except mysql.connector.Error as err:
        print(f"Error executing query: {err}\nQuery: {query.strip()[:100]}...") # Show first 100 chars of query
    except Exception as e:
        print(f"An unexpected error occurred: {e}\nQuery: {query.strip()[:100]}...")
    finally:
        if connection and connection.is_connected():
            if 'cursor' in locals() and cursor is not None:
                cursor.close()
            connection.close()
    return pd.DataFrame() # Return empty DataFrame on error


In [14]:
def load_and_execute_queries_from_file(sql_filepath):
   
    if not os.path.exists(sql_filepath):
        print(f"Error: SQL file not found at '{sql_filepath}'")
        return

    with open(sql_filepath, 'r', encoding='utf-8') as f:
        sql_script = f.read()

    # Split the script into individual queries using semicolon as a delimiter
    # This regex handles comments and ensures proper splitting.
    cleaned_script = re.sub(r'--.*$', '', sql_script, flags=re.MULTILINE) # Remove single-line comments
    cleaned_script = re.sub(r'/\*.*?\*/', '', cleaned_script, flags=re.DOTALL) # Remove multi-line comments
    queries = [q.strip() for q in cleaned_script.split(';') if q.strip()]

    print(f"Found {len(queries)} queries in '{sql_filepath}'. Executing them one by one...\n")

    for i, query in enumerate(queries):
        print(f"--- Executing Query {i+1} ---")
        print(f"Query:\n{query.strip()}\n")
        
        df_result = execute_sql_query(query)
        
        if not df_result.empty:
            print("Query Result:")
            print(df_result)
        else:
            print("No results or an error occurred for this query.")
        print("-" * 50 + "\n") # Separator for readability

In [39]:
# --- Main execution block ---
if __name__ == "__main__":
    sql_file = "phonepe_analysis_queries.sql" # Name of your SQL file
    load_and_execute_queries_from_file(sql_file)
    print("All queries executed.")

Found 19 queries in 'phonepe_analysis_queries.sql'. Executing them one by one...

--- Executing Query 1 ---
Query:
SELECT
    transaction_type,
    SUM(transaction_count) AS total_transactions,
    SUM(transaction_amount) AS total_transaction_amount
FROM Aggregated_transaction
GROUP BY transaction_type
ORDER BY total_transaction_amount DESC

Query Result:
           transaction_type total_transactions total_transaction_amount
0     Peer-to-peer payments        85032446653       266527358971212.40
1         Merchant payments       130238755487        65339877074733.18
2  Recharge & bill payments        19596755603        13338759360277.80
3                    Others          262050188          174280661976.44
4        Financial Services          154208943          142018767186.49
--------------------------------------------------

--- Executing Query 2 ---
Query:
SELECT
    year,
    quarter,
    SUM(transaction_count) AS quarterly_transactions,
    SUM(transaction_amount) AS quarterly_