In [None]:
import pandas as pd
import json
import os
import requests
import sqlite3

#Load data from CSV or JSON file and return as a DataFrame
def load_data(file_path):
    file_extension = os.path.splitext(file_path)[1].lower()
    if file_extension == '.csv':
        return pd.read_csv(file_path), 'csv'
    elif file_extension == '.json':
        with open(file_path, 'r') as file:
            data = json.load(file)
        return pd.DataFrame(data), 'json'
    else:
        raise ValueError("Unsupported file type. Please provide a CSV or JSON file.")

#Fetch a remote file from a given URL and save it locally.
def fetch_remote_file(url, save_path):
    response = requests.get(url)
    if response.status_code == 200:
        content_type = response.headers.get('Content-Type', '').lower()
        if 'json' in content_type:
            # If the content is JSON, handle it as an API response
            data = response.json()
            return pd.DataFrame(data), 'json'
        else:
            # Assume it's a file download (CSV or other)
            with open(save_path, 'wb') as file:
                file.write(response.content)
            print(f"File downloaded and saved as {save_path}")
            return load_data(save_path)  # Return loaded data
    else:
        raise Exception(f"Failed to fetch file. Status code: {response.status_code}")

#Return summary of the inputted DataFrame.  
def summarize_data(df):
    summary = {
        "Number of records": df.shape[0],
        "Number of columns": df.shape[1],
        "Columns": list(df.columns)
    }
    return summary

#Ask the user if they want to delete a column and delete it if confirmed.
def delete_column(df):
    print("Available columns: ", list(df.columns))
    delete = input("Would you like to delete a column? (yes/no): ").strip().lower()
    
    if delete == 'yes':
        column_to_delete = input("Enter the name of the column you want to delete: ").strip()
        if column_to_delete in df.columns:
            df.drop(columns=[column_to_delete], inplace=True)
            print(f"Column '{column_to_delete}' has been deleted.")
        else:
            print(f"Column '{column_to_delete}' does not exist in the data.")
    else:
        print("No columns were deleted.")
    
    return df

#Add a new column with default values.
def add_column(df):
    column_name = input("Enter the name of the column you want to add: ").strip()
    default_value = input(f"Enter the default value for column '{column_name}': ").strip()
    df[column_name] = default_value
    print(f"Column '{column_name}' has been added.")
    return df

def data_conversion_pipeline():
    try:
        # Prompt user for the file path or URL
        input_choice = input("Do you want to enter a file path or a URL? (file/url): ").strip().lower()
        
        if input_choice == 'file':
            file_path = input("Enter the path of the file (CSV or JSON): ").strip()
            if not os.path.exists(file_path):
                return {"error": "The specified file does not exist."}
            df, file_type = load_data(file_path)
        
        elif input_choice == 'url':
            url = input("Enter the URL of the file or API endpoint: ").strip()
            save_path = 'downloaded_file'  # Save the file temporarily
            df, file_type = fetch_remote_file(url, save_path)
        
        else:
            return {"error": "Invalid input. Please choose 'file' or 'url'."}
        
        # Generate and print summary before conversion
        summary = summarize_data(df)
        print(f"Summary of the input file:\n"
              f"Number of records: {summary['Number of records']}\n"
              f"Number of columns: {summary['Number of columns']}\n"
              f"Columns: {', '.join(summary['Columns'])}")

        # Ask user to modify data (add/delete columns)
        df = delete_column(df)
        df = add_column(df)

        # Prompt user for the desired output format
        output_format = input("Enter the desired output format (CSV, JSON, SQL): ").strip().lower()
        if output_format not in ['csv', 'json', 'sql']:
            return {"error": "Unsupported output format. Choose CSV, JSON, or SQL."}
        
        # Conversion based on input and output formats
        if file_type == 'csv' and output_format == 'json':
            output_path = 'output.json'
            df.to_json(output_path, orient='records', indent=4)
        
        elif file_type == 'json' and output_format == 'csv':
            output_path = 'output.csv'
            df.to_csv(output_path, index=False)

        elif output_format == 'sql':
            conn = sqlite3.connect('output.db')
            table_name = 'data_table'
            df.to_sql(table_name, conn, if_exists='replace', index=False)
            conn.close()
            output_path = 'output.db'
            summary["table_name"] = table_name

        else:
            return {"error": "The conversion you requested is not supported."}
        
        summary["output_path"] = output_path
        print(f"Conversion successful! Output saved to: {output_path}")

        # Generate post-processing summary
        post_summary = summarize_data(df)
        print(f"Post-processing summary:\n"
              f"Number of records: {post_summary['Number of records']}\n"
              f"Number of columns: {post_summary['Number of columns']}\n"
              f"Columns: {', '.join(post_summary['Columns'])}")

        return summary

    except Exception as e:
        return {"error": str(e)}

# Run the function
summary = data_conversion_pipeline()
if "error" in summary:
    print(summary["error"])
else:
    print("Data conversion completed successfully:", summary)
