ðŸŸ¦ Key Points
CSV/JSON/Parquet â†’ handled easily with pandas.

ORC/Avro â†’ need pyarrow and fastavro.

XML â†’ parsed with xml.etree.ElementTree.

SQL â†’ accessed with sqlite3 (or sqlalchemy for other DBs).

In [6]:
import pandas as pd
import json
import sqlite3
import pyarrow.parquet as pq
import pyarrow.orc as orc
import fastavro
import xml.etree.ElementTree as ET

def read_csv(file_path):
    return pd.read_csv(file_path)

def read_json(file_path):
    return pd.read_json(file_path)

def read_parquet(file_path):
    return pd.read_parquet(file_path)

def read_orc(file_path):
    table = orc.ORCFile(file_path).read()
    return table.to_pandas()

def read_avro(file_path):
    with open(file_path, 'rb') as f:
        reader = fastavro.reader(f)
        return pd.DataFrame(list(reader))

def read_xml(file_path, row_tag):
    tree = ET.parse(file_path)
    root = tree.getroot()
    rows = []
    for elem in root.findall(row_tag):
        row = {child.tag: child.text for child in elem}
        rows.append(row)
    return pd.DataFrame(rows)

def read_sqlite(db_path, query):
    conn = sqlite3.connect(db_path)
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

# Example usage:
if __name__ == "__main__":
    print("hello")
    # Replace with actual file paths
  #  csv_df = read_csv("data.csv")
  #  json_df = read_json("data.json")
   # parquet_df = read_parquet("data.parquet")
 #   orc_df = read_orc("data.orc")
  #  avro_df = read_avro("data.avro")
 #   xml_df = read_xml("data.xml", "book")  # row_tag depends on XML structure
  #  sql_df = read_sqlite("data.db", "SELECT * FROM orders")

  #  print("CSV Data:\n", csv_df.head())
   # print("JSON Data:\n", json_df.head())
  #  print("Parquet Data:\n", parquet_df.head())
   # print("ORC Data:\n", orc_df.head())
   # print("Avro Data:\n", avro_df.head())
   # print("XML Data:\n", xml_df.head())
   # print("SQL Data:\n", sql_df.head())

hello
