Collecting import-ipynb
  Downloading import_ipynb-0.2-py3-none-any.whl.metadata (2.3 kB)
Downloading import_ipynb-0.2-py3-none-any.whl (4.0 kB)
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.2


In [23]:
import os

print("Exists:", os.path.exists("core/mongodb_manager.py"))
if os.path.exists("core/mongodb_manager.py"):
    with open("core/mongodb_manager.py") as f:
        print("First 10 lines:\n", "".join([next(f) for _ in range(10)]))


Exists: True
First 10 lines:
 #!/usr/bin/env python
# coding: utf-8

# In[2]:


get_ipython().system('pip install import-ipynb')


# In[10]:



In [24]:
import import_ipynb

from config.settings import Config
from core.mongodb_manager import MongoDBManager
from spark.processor import SparkProcessor
from analysis.engine import AnalysisEngine
from reporting.summary import generate_summary_report
import findspark
import sys

def main():
    print(" Starting Data Processing Pipeline")
    print("=" * 50)
    
    # Step 1: Spark Setup
    print("\n1. Spark Setup")
    try:
        findspark.init()
        from pyspark.sql import SparkSession
        spark = SparkSession.builder \
            .appName("CityDataAnalysis") \
            .config("spark.mongodb.read.connection.uri", Config.MONGO_URI) \
            .config("spark.mongodb.write.connection.uri", Config.MONGO_URI) \
            .getOrCreate()
        spark.sparkContext.setLogLevel("ERROR")
        print(" Spark setup successful")
    except Exception as e:
        print(" Spark setup failed. Please install Spark dependencies.")
        print("Run: pip install pyspark findspark")
        print(f"Error: {e}")
        return

    # Step 2: MongoDB Setup and Data Import
    print("\n2. MongoDB Setup and Data Import")
    try:
        mongo_manager = MongoDBManager()
        for city in ['delhi', 'bangalore']:
            print(f"\n Processing {city.upper()}...")
            success = mongo_manager.import_city_data(city)
            if success:
                print(f"Successfully imported {city} data")
            else:
                print(f" Failed to import {city} data")

        print("\n" + "=" * 50)
        mongo_manager.get_database_stats()

    except Exception as e:
        print(f"MongoDB operations failed: {e}")
        spark.stop()
        return

    # Step 3: Spark Processing
    print("\n3.Spark Data Processing")
    spark_processor = SparkProcessor(spark)

    all_analysis_results = []
    for city in ['delhi', 'bangalore']:
        print(f"\n Processing {city.upper()} with Spark...")
        results = spark_processor.process_city_data(city, mongo_manager)
        if results:
            spark_processor.save_spark_results(results, city)
            print(f" Successfully processed {city} data")
        else:
            print(f" Failed to process {city} data with Spark")

    # Step 4: Analysis and Visualization
    print("\n4. Data Analysis and Visualization")
    analysis_engine = AnalysisEngine()

    for city in ['delhi', 'bangalore']:
        print(f"\nAnalyzing {city.upper()}...")
        analysis_results = analysis_engine.perform_comprehensive_analysis(city)
        if analysis_results:
            all_analysis_results.append(analysis_results)
        data = analysis_engine.load_analysis_data(city)
        if data:
            analysis_engine.create_comprehensive_visualizations(city, data)

    # Step 5: Generate Summary Report
    print("\n5. ðŸ“‹ Generating Summary Report")
    generate_summary_report(all_analysis_results)

    # Clean up Spark
    if spark:
        spark.stop()
        print("Spark session stopped")

    print("\nPipeline completed successfully!")
    print("Check the 'data/outputs/' folder for results and visualizations")

if __name__ == "__main__":
    main()

ImportError: cannot import name 'MongoDBManager' from 'core.mongodb_manager' (unknown location)