In [1]:
import os
import sqlite3
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine, inspect, MetaData, text

In [2]:
member_info = pd.read_csv('./data/member_info_final.csv')
loyality_info = pd.read_csv('./data/loyalty_info_final.csv')
booking_info = pd.read_csv('./data/bookings_info_final.csv')

In [3]:
print(f"member_info: {member_info.shape}")
print(f"booking_info: {booking_info.shape}")
print(f"loyality_info: {loyality_info.shape}")

member_info: (47074, 24)
booking_info: (47074, 35)
loyality_info: (47074, 10)


In [4]:
def run_query(sql_query, db_uri='sqlite:///./data/airline.sqlite'):
    engine = create_engine(db_uri)
    with engine.connect() as conn:
        result = conn.execute(text(sql_query))
        results_as_dict = result.mappings().all()       
        return results_as_dict

In [5]:
def csv_to_sqlite(csv_file_path, sqlite_db_path, table_name):
    # Check if the CSV file exists
    if not os.path.exists(csv_file_path):
        raise FileNotFoundError(f"The file {csv_file_path} does not exist.")
    
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)
    df.columns = df.columns.str.replace('"', '')
    
    # Create a connection to the SQLite database
    conn = sqlite3.connect(sqlite_db_path)

    # Write the data to the SQLite database
    df.to_sql(table_name, conn, if_exists='replace', index=False)

    # Close the connection
    conn.close()

    print(f"Data from {csv_file_path} has been successfully written to {sqlite_db_path} in the table {table_name}.")

In [6]:
csv_to_sqlite('./data/bookings_info_final.csv','./data/airline.sqlite','booking_info')
csv_to_sqlite('./data/loyalty_info_final.csv','./data/airline.sqlite','loyality_info')
csv_to_sqlite('./data/member_info_final.csv','./data/airline.sqlite','member_info')

Data from ./data/bookings_info_final.csv has been successfully written to ./data/airline.sqlite in the table booking_info.
Data from ./data/loyalty_info_final.csv has been successfully written to ./data/airline.sqlite in the table loyality_info.
Data from ./data/member_info_final.csv has been successfully written to ./data/airline.sqlite in the table member_info.


In [7]:
member_info_result = run_query('SELECT * from member_info','sqlite:///./data/airline.sqlite')
booking_info_result = run_query('SELECT * from booking_info;','sqlite:///./data/airline.sqlite')
loyality_info_result = run_query('SELECT * from loyality_info;','sqlite:///./data/airline.sqlite')

In [8]:
print(f"member_info: ({len(member_info_result)},{len(member_info_result[0])})")
print(f"booking_info: ({len(booking_info_result)},{len(booking_info_result[0])})")
print(f"loyality_info: ({len(loyality_info_result)},{len(loyality_info_result[0])})")

member_info: (47074,24)
booking_info: (47074,35)
loyality_info: (47074,10)


In [9]:
import re
from langchain_community.utilities import SQLDatabase
    
db_path = "./data/airline.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")   

def get_schema():
    schema = db.get_table_info()
    
    #to remove charachters between /* */ and new line characters.
    schema_cleaned = re.sub(r'/\*.*?\*/', '', schema, flags=re.DOTALL)
    
    #to remove leading and trailing spaces
    schema_cleaned = schema_cleaned.strip()
    
    #to remove extra blank spaces
    schema_cleaned = re.sub(r'\n\s*\n+', '\n\n', schema_cleaned)
    
    splited = schema_cleaned.split('\nCREATE')

    for i in range(1,len(splited)):
        splited[i] = "CREATE" + splited[i]    

    final_schema = "\n".join(splited)

    return final_schema

In [10]:
print(get_schema())

CREATE TABLE booking_info (
	"PNR" TEXT, 
	"Origin" TEXT, 
	"Destination" TEXT, 
	scheduled_departure_date TEXT, 
	num_passengers INTEGER, 
	trip_type TEXT, 
	purchase_lead INTEGER, 
	length_of_stay INTEGER, 
	flight_hour INTEGER, 
	flight_day TEXT, 
	booking_origin TEXT, 
	wants_extra_baggage INTEGER, 
	wants_preferred_seat INTEGER, 
	wants_in_flight_meals INTEGER, 
	flight_duration REAL, 
	flight_number TEXT, 
	arrival_delay_minutes INTEGER, 
	arrival_delay_group TEXT, 
	cabin_name TEXT, 
	number_of_legs INTEGER, 
	haul_type TEXT, 
	departure_gate TEXT, 
	arrival_gate TEXT, 
	international_domestic_indicator TEXT, 
	amount REAL, 
	payment_method TEXT, 
	operating_airline TEXT, 
	wants_priority_checkin INTEGER, 
	wants_priority_boarding INTEGER, 
	travel_insurance_flag INTEGER, 
	travel_insurance_amount REAL, 
	booking_class TEXT, 
	fare_type TEXT, 
	"PNR_status" TEXT, 
	"PNR_status_code" TEXT
)

CREATE TABLE loyality_info (
	"PNR" TEXT, 
	customer_email TEXT, 
	account_number INTEGER