In [14]:
import pandas as pd
import os
from sqlalchemy import create_engine
import time
import logging

logging.basicConfig(
    filename="logs/SQLite_ingestion.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

engine = create_engine('sqlite:///inventory.db')

def ingest_db(df, table_name, engine):
    '''This function ingests the dataframe into a database table'''
    df.to_sql(table_name, con=engine, if_exists='replace', index=False)

def load_raw_data():
    '''This function loads CSV, Excel, and JSON files as DataFrame and ingests them into DB'''
    start = time.time()
    
    for file in os.listdir('data'):
        file_path = os.path.join('data', file)
        if file.endswith('.csv'):
            df = pd.read_csv(file_path)
            table_name = file[:-4]
        elif file.endswith('.xlsx') or file.endswith('.xls'):
            df = pd.read_excel(file_path)
            table_name = file[:-5]
        elif file.endswith('.json'):
            df = pd.read_json(file_path)
            table_name = file[:-5]
        else:
            continue  # skip unsupported file types
        
        logging.info(f'Ingesting {file} into DB...')
        ingest_db(df, table_name, engine)
    
    end = time.time()
    total_time = (end - start)/60
    logging.info('-----------Ingestion Complete-----------')
    logging.info(f'Total Time Taken: {total_time} minutes')

if __name__ == '__main__':
    load_raw_data()

In [2]:
import pandas as pd
import sqlite3

In [3]:
# Creating database connection
conn = sqlite3.connect('inventory.db')

In [4]:
# Checking tables present in the database
tables = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'",conn)
tables

Unnamed: 0,name
0,Blinkit Grocery Data
1,BlinkIT


In [8]:

for table in tables["name"]:
    print('-'*50, f'{table}','-'*50)
    print('Count of records:', pd.read_sql(f"select count(*) as count from '{table}'", conn)['count'].values[0])
    # Display top 3 rows of the each table
    display(pd.read_sql(f"select * from '{table}' limit 3", conn))

-------------------------------------------------- Blinkit Grocery Data --------------------------------------------------
Count of records: 8523


Unnamed: 0,Item_Fat_Content,Item_Identifier,Item_Type,Outlet_Establishment_Year,Outlet_Identifier,Outlet_Location_Type,Outlet_Size,Outlet_Type,Item_Visibility,Item_Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2016,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0


-------------------------------------------------- BlinkIT --------------------------------------------------
Count of records: 8523


Unnamed: 0,Item_Fat_Content,Item_Identifier,Item_Type,Outlet_Establishment_Year,Outlet_Identifier,Outlet_Location_Type,Outlet_Size,Outlet_Type,Item_Visibility,Item_Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2016,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0


In [11]:
data = pd.read_sql(f"select * from 'Blinkit Grocery Data'", conn)
data.head(4)

Unnamed: 0,Item_Fat_Content,Item_Identifier,Item_Type,Outlet_Establishment_Year,Outlet_Identifier,Outlet_Location_Type,Outlet_Size,Outlet_Type,Item_Visibility,Item_Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2016,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0
3,Regular,FDL50,Canned,2014,OUT013,Tier 3,High,Supermarket Type1,0.042278,12.15,126.5046,5.0
