In [6]:
!pip install pandas 
import pandas as pd
import sqlite3
import os


def load_csv_data(csv_path):
    return pd.read_csv(csv_path)

def load_excel_data(excel_path):
    return pd.read_excel(excel_path)

def load_json_data(json_path):
    return pd.read_json(json_path)


def combine_dataframes(dfs):
    return pd.concat(dfs, ignore_index=True)



def clean_data(df):
    df.fillna({
        'Revenue': df['Revenue'].median(),
        'Product Category': 'Unknown',
        'Date': pd.Timestamp.now()
    }, inplace=True)
    return df.dropna(subset=['Product Name'])



def filter_and_sort(df, category=None, min_revenue=0):
    if category:
        df = df[df['Product Category'] == category]
    df = df[df['Revenue'] >= min_revenue]
    return df.sort_values(by='Date')



def store_to_sql(df, db_path='sales.db'):
    conn = sqlite3.connect(db_path)
    df.to_sql('sales', conn, if_exists='replace', index=False)
    conn.close()

def load_from_sql(db_path='sales.db'):
    conn = sqlite3.connect(db_path)
    df = pd.read_sql('SELECT * FROM sales', conn)
    conn.close()
    return df



def export_data(df, output_folder='exported'):
    os.makedirs(output_folder, exist_ok=True)
    df.to_csv(f'{output_folder}/cleaned_sales.csv', index=False)
    df.to_excel(f'{output_folder}/cleaned_sales.xlsx', index=False)
    df.to_json(f'{output_folder}/cleaned_sales.json', orient='records', lines=True)



def read_csv_in_chunks(csv_path, chunk_size=1000):
    chunks = pd.read_csv(csv_path, chunksize=chunk_size)
    total_rows = 0
    for chunk in chunks:
        total_rows += len(chunk)
    print(f"Total rows processed in chunks: {total_rows}")



def process_all_data(csv_path, excel_path, json_path):
    csv_df = load_csv_data(csv_path)
    excel_df = load_excel_data(excel_path)
    json_df = load_json_data(json_path)

    combined_df = combine_dataframes([csv_df, excel_df, json_df])
    cleaned_df = clean_data(combined_df)
    filtered_sorted_df = filter_and_sort(cleaned_df, category='Electronics', min_revenue=100)

    store_to_sql(filtered_sorted_df)
    sql_df = load_from_sql()

    export_data(sql_df)


