In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import os
from sqlalchemy import create_engine
import time
import logging
import pymysql

# ----------- Logging Setup -----------
logging.basicConfig(
    filename="logs/mysql_ingestion.log",
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

# ----------- MySQL Connection Setup -----------
DB_USER = 'root'          # Change to your MySQL username
DB_PASSWORD = 'xyz'      # Change to your MySQL password
DB_HOST = 'localhost'
DB_PORT = '3306'
DB_NAME = 'xyz'    # Change to your DB name

# ----------- Auto Create Database if not exists -----------
try:
    conn = pymysql.connect(
        host=DB_HOST,
        user=DB_USER,
        password=DB_PASSWORD,
        port=int(DB_PORT)
    )
    cursor = conn.cursor()
    cursor.execute(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}")
    conn.close()
    logging.info(f"Database '{DB_NAME}' checked/created successfully.")
except Exception as e:
    logging.error(f"Error creating database: {e}")
    raise

# ----------- SQLAlchemy Engine Setup -----------
engine = create_engine(f'mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

# ----------- Function to Ingest Data -----------
def ingest_db(df, table_name, engine):
    df.to_sql(table_name, con=engine, if_exists='replace', index=False)
    logging.info(f"Successfully ingested {table_name} into database.")

# ----------- Load All Files and Ingest -----------
def load_raw_data():
    start = time.time()
    logging.info("------ Starting Ingestion ------")

    for file in os.listdir('data'):
        file_path = os.path.join('data', file)

        try:
            if file.endswith('.csv'):
                df = pd.read_csv(file_path)
                table_name = file[:-4]
            elif file.endswith('.xlsx') or file.endswith('.xls'):
                df = pd.read_excel(file_path)
                table_name = file[:-5]
            elif file.endswith('.json'):
                df = pd.read_json(file_path)
                table_name = file[:-5]
            else:
                logging.warning(f"Skipped unsupported file: {file}")
                continue

            # Clean column names for MySQL (max length 64)
            df.columns = [col.strip().replace('"', '').replace(";", "")[:64] for col in df.columns]

            logging.info(f"Ingesting file: {file}")
            ingest_db(df, table_name, engine)

        except Exception as e:
            logging.error(f"Error processing file {file}: {e}")

    end = time.time()
    total_time = (end - start) / 60
    logging.info("------ Ingestion Complete ------")
    logging.info(f"Total Time Taken: {total_time:.2f} minutes")

# ----------- Run the Script -----------
if __name__ == '__main__':
    load_raw_data()

In [2]:
# importing librarie
import mysql.connector

In [3]:
# Connect to server

conn = mysql.connector.connect(
    host="127.0.0.1",
    port=3306,
    user="root",
    password="2526")

In [4]:
query = "SELECT * FROM `blinkit_db`.`Blinkit Grocery Data`"
df = pd.read_sql(query, conn)
pd.set_option('display.max_columns', None)  # Show all columns
df.head()

Unnamed: 0,Item_Fat_Content,Item_Identifier,Item_Type,Outlet_Establishment_Year,Outlet_Identifier,Outlet_Location_Type,Outlet_Size,Outlet_Type,Item_Visibility,Item_Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2016,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0
3,Regular,FDL50,Canned,2014,OUT013,Tier 3,High,Supermarket Type1,0.042278,12.15,126.5046,5.0
4,Low Fat,DRI25,Soft Drinks,2015,OUT045,Tier 2,Small,Supermarket Type1,0.03397,19.6,55.1614,5.0


In [5]:
query = "SELECT * FROM blinkit_db.blinkit"
df = pd.read_sql(query, conn)
pd.set_option('display.max_columns', None)  # Show all columns
df.head()

Unnamed: 0,Item_Fat_Content,Item_Identifier,Item_Type,Outlet_Establishment_Year,Outlet_Identifier,Outlet_Location_Type,Outlet_Size,Outlet_Type,Item_Visibility,Item_Weight,Sales,Rating
0,Regular,FDX32,Fruits and Vegetables,2012,OUT049,Tier 1,Medium,Supermarket Type1,0.100014,15.1,145.4786,5.0
1,Low Fat,NCB42,Health and Hygiene,2022,OUT018,Tier 3,Medium,Supermarket Type2,0.008596,11.8,115.3492,5.0
2,Regular,FDR28,Frozen Foods,2016,OUT046,Tier 1,Small,Supermarket Type1,0.025896,13.85,165.021,5.0
3,Regular,FDL50,Canned,2014,OUT013,Tier 3,High,Supermarket Type1,0.042278,12.15,126.5046,5.0
4,Low Fat,DRI25,Soft Drinks,2015,OUT045,Tier 2,Small,Supermarket Type1,0.03397,19.6,55.1614,5.0


In [6]:
import pandas as pd
from sqlalchemy import create_engine

# SQLAlchemy connection
engine = create_engine("mysql+pymysql://root:2526@localhost:3306/blinkit_db")

# Fetch all tables
tables_df = pd.read_sql("SHOW TABLES", engine)
print(tables_df)

   Tables_in_blinkit_db
0               blinkit
1  blinkit grocery data
2  blinkit_grocery_data
