In [1]:
# imports
import pandas as pd
import requests
import json
import os

In [2]:
def ingest_csv(file_path):
    """
    Ingests a CSV file and returns a DataFrame.
    """
    print(f"Ingesting CSV file from {file_path}")
    df = pd.read_csv(file_path)
    return df

def ingest_json(file_path):
    """
    Ingests a JSON file and returns a DataFrame.
    """
    print(f"Ingesting JSON file from {file_path}")
    with open(file_path, 'r') as file:
        data = json.load(file)
    df = pd.json_normalize(data)
    return df

def ingest_api(url):
    """
    Ingests data from a URL and returns a DataFrame.
    """
    print(f"Ingesting data from API at {url}")
    response = requests.get(url)
    data = response.json()
    hourly_data = pd.DataFrame(data['hourly'])
    return hourly_data

In [None]:
# main pipeline function to coordinate ingestion and merging
def run_pipeline()
    # define paths
    csv_file_path = 'energy_efficiency.csv'
    json_file_path = 'data/sample_data.json'
    api_url = 'https://api.example.com/data'    
    
    # load data from CSV
    csv_data = ingest_csv(csv_file_path)
    
    # load data from JSON
    json_data = ingest_json(json_file_path)
    
    # load data from API
    api_data = ingest_api(api_url)
    
    # merge all dataframes
    combined_df = pd.concat([csv_data, json_data, api_data], axis=1)
    print("Data ingestion complete. Combined DataFrame:")
    print(combined_df.head())
    
    # save the combined DataFrame to a new CSV file
    combined_file_path = 'data/combined_data.csv'
    combined_df.to_csv(combined_file_path, index=False)
    
# run the pipeline
if __name__ == "__main__":
    run_pipeline()