In [12]:
!pip install pandas 



In [14]:
import sqlite3
import pandas as pd
from datetime import datetime
from pathlib import Path
from IPython.display import display

# Directory containing your CSV files
csv_dir = Path("processed_data/csv")  # change this to your CSV folder path

# Connect to SQLite database
conn = sqlite3.connect("feature_store.db")
cursor = conn.cursor()

# -------- Step 1: Create metadata table (if not exists) --------
cursor.execute('''
CREATE TABLE IF NOT EXISTS feature_metadata (
    feature_name TEXT PRIMARY KEY,
    description TEXT,
    source TEXT,
    version TEXT,
    created_at TEXT
)
''')

# -------- Step 2: Create feature store table (if not exists) --------
cursor.execute('''
CREATE TABLE IF NOT EXISTS feature_store (
    customerID TEXT PRIMARY KEY,
    tenure INTEGER,
    MonthlyCharges REAL,
    TotalCharges REAL,
    Contract_OneYear INTEGER,
    Contract_TwoYear INTEGER,
    PaymentMethod_CreditCard INTEGER,
    PaymentMethod_ElectronicCheck INTEGER,
    PaymentMethod_MailedCheck INTEGER,
    Churn INTEGER
)
''')

# -------- Step 3: Loop through CSV files and insert data --------
for csv_file in csv_dir.glob("*.csv"):
    print(f"Inserting data from {csv_file.name}")
    
    # Load CSV
    df = pd.read_csv(csv_file)
    
    # Insert metadata dynamically
    features_meta = []
    for col in df.columns:
        if col == "Churn":
            source = "Label"
            description = "Whether customer churned (0/1)"
        elif col.startswith("Contract") or col.startswith("PaymentMethod"):
            source = "Engineered"
            description = f"Feature: {col}"
        elif col == "customerID":
            source = "ID"
            description = "Unique customer identifier"
        else:
            source = "Telco DB"
            description = f"Feature: {col}"
        features_meta.append((col, description, source, "v1", str(datetime.now())))
    
    cursor.executemany('''
    INSERT OR IGNORE INTO feature_metadata 
    (feature_name, description, source, version, created_at)
    VALUES (?, ?, ?, ?, ?)
    ''', features_meta)
    
    # Insert feature data dynamically
    records = df.to_records(index=False)
    cursor.executemany('''
    INSERT OR REPLACE INTO feature_store VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', records)

# Commit all changes
conn.commit()

# -------- Step 4: Retrieve and display data --------
features = pd.read_sql("SELECT * FROM feature_store", conn)
print("Feature Store Data:")
display(features)

metadata = pd.read_sql("SELECT * FROM feature_metadata", conn)
print("Feature Metadata:")
display(metadata)

# Close connection
conn.close()


AttributeError: partially initialized module 'pandas' has no attribute 'core' (most likely due to a circular import)

In [None]:
def pipeline_with_versioning(df):
    """
    Takes a real dataframe (with Churn) and registers versions.
    """
    # ---- Feature Engineering ----
    df["TotalSpend"] = df["MonthlyCharges"] * df["tenure"]
    df["LongTenureFlag"] = df["tenure"] > 24
    df["PaymentScore"] = df["PaymentMethod"].apply(lambda x: 0 if x == "ElectronicCheck" else 1)
 
    # Save raw & engineered data
    raw_path = "reports/raw_customers.csv"
    eng_path = "reports/engineered_customers.csv"
    df.to_csv(eng_path, index=False)
 
 
 

In [None]:
pipeline_with_versioning(engineered_df)

In [None]:
DB_PATH = "D:\BITS_SEM2\DMML_Assignment\Task6_FeatureStore\feature_store.db"
def export_features_to_parquet(db_path=DB_PATH):
    conn = sqlite3.connect(db_path)
    df = pd.read_sql_query("SELECT * FROM feature_store", conn)
    conn.close()

    # Convert problematic types
    for col in df.columns:
        if pd.api.types.is_period_dtype(df[col]):
            df[col] = df[col].astype(str)
    for col in df.select_dtypes(["category", "object"]).columns:
        df[col] = df[col].astype(str)
    for col in df.select_dtypes(include=["datetimetz"]).columns:
        df[col] = df[col].dt.tz_localize(None)

    # Export with fastparquet
    out_path = os.path.join(FEATURE_STORE_DIR, "customer_features.parquet")
    df.to_parquet(out_path, index=False, engine="fastparquet")
    print(f"[Feature Store] Exported features → {out_path}")
    return out_path

In [None]:
 export_features_to_parquet("D:\BITS_SEM2\DMML_Assignment\Task6_FeatureStore\feature_store.db")

NameError: name 'sqlite3' is not defined