# Python with MongoDB

## Libraries and Settings

In [None]:
# Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pymongo import MongoClient
from pymongo.errors import ConnectionFailure

# Settings
import warnings
warnings.filterwarnings("ignore")

# Current working directory
print(os.getcwd())

## Read and insert data

In [None]:
# Function to insert data
def insert_data():
    try:
        # Connect to the MongoDB server
        client = MongoClient("mongodb://mongo:27017/")
        
        # List databases to check the connection
        databases = client.list_database_names()
        print("Connected to MongoDB. Databases:", databases)
        
        # Access the specific database and collection
        db = client['car_database']
        collection = db['car_collection']
        
        # Read data from JSON file
        with open('/workspace/car_data.json', 'r') as file:
            example_data = json.load(file)
        
        # Insert data into the collection
        try:
            insert_result = collection.insert_many(example_data)
            # print(f"Inserted document IDs: {insert_result.inserted_ids}")
            print(f"Inserted documents to mongo db")
        except Exception as e:
            print(f"Insert error: {e}")
        
    except ConnectionFailure as e:
        print("Failed to connect to MongoDB:", e)

# Function call
insert_data()

## Query the collection

In [None]:
# Function to create the connection
def get_mongo_connection():
    try:
        # Connect to the MongoDB server
        client = MongoClient("mongodb://mongo:27017/")
        print("Connected to MongoDB.")
        return client
    except ConnectionFailure as e:
        print("Failed to connect to MongoDB:", e)
        return None

# Function to query the data
def query_collection(db_name, collection_name, query):
    client = get_mongo_connection()
    if client:
        db = client[db_name]
        collection = db[collection_name]
        results = list(collection.find(query))
        return results
    return []

# Define the database and collection names
db_name = 'car_database'
collection_name = 'car_collection'
    
# Define a query with multiple conditions
query = {
    "Price": {"$gt": 10000, "$lt": 120000},
    "PS": {"$gt": 125}
}

# Execute the query and fetch the results
results = query_collection(db_name, collection_name, query)

# Convert the results into a Pandas DataFrame
df = pd.DataFrame(results)

# Display selected columns
df[['Offer_Id', 'Marke', 'Type', 'Fuel_Type', 'Price', 'PS']].head()

## Create histogram of car prices

In [None]:
# Create a histogram of car prices
plt.hist(df['Price'], bins=50, color='greenyellow')
plt.title('Histogram of Car Prices')
plt.xlabel('Car price (CHF)')
plt.ylabel('Frequency')
plt.grid()
plt.show()

## Remove existing DBs and Connections

In [None]:
# Function to remove existing DBs and Connections
def remove_db_and_collection(db_name, collection_name):
    try:
        # Connect to the MongoDB server
        client = MongoClient("mongodb://mongo:27017/")
        
        # List databases to check the connection
        databases = client.list_database_names()
        print("Connected to MongoDB. Databases:", databases)
        
        # Access the specific database and collection
        db_name = db_name
        collection_name = collection_name
        
        # Drop the collection
        db = client[db_name]
        if collection_name in db.list_collection_names():
            db.drop_collection(collection_name)
            print(f"Collection '{collection_name}' dropped from database '{db_name}'.")
        else:
            print(f"Collection '{collection_name}' does not exist in database '{db_name}'.")
        
        # Drop the database
        if db_name in databases:
            client.drop_database(db_name)
            print(f"Database '{db_name}' dropped.")
        else:
            print(f"Database '{db_name}' does not exist.")
        
    except ConnectionFailure as e:
        print("Failed to connect to MongoDB:", e)

# Remove existing DBs and Connections
remove_db_and_collection('car_database', 'car_collection')

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')