# Python with MongoDB

## Libraries and Settings

In [None]:
# Libraries
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient

# Settings
import warnings
warnings.filterwarnings("ignore")

# Current working directory
print(os.getcwd())

## Connect to the MongoDB

In [None]:
# Connect to the MongoDB service
client = MongoClient("mongodb://localhost:27017/")

## Select or create database and collection

In [None]:
# Select or create a database
db = client["car_database"]

# Select or create a collection (similar to a table in SQL)
collection = db["car_collection"]

## Create .json file from .xlsx file with car data

In [None]:
# Read cara data from an Excel file into a DataFrame
file_path = 'car_data.xlsx'
df = pd.read_excel(file_path)

# Convert the DataFrame to a JSON object
data_json = df.to_json(orient='records', indent=4)

# Store the JSON data in a file
with open('car_data.json', 'w') as file:
    file.write(data_json)

print("Data has been read from the Excel file and stored in data.json")

## Read and insert data

In [None]:
# Read data
with open('car_data.json', 'r') as file:
    example_data = json.load(file)

# Insert data into the collection
try:
    insert_result = collection.insert_many(example_data)
    # print(f"Inserted document IDs: {insert_result.inserted_ids}")
except Exception as e:
    print(f"Insert error: {e}")

## Query the collection

In [None]:
# Define a query with multiple conditions
query = {
    "Price": {"$gt": 10000, "$lt": 120000},
    "PS": {"$gt": 125}
}

# Execute the query and fetch the results
results = list(collection.find(query))

# Convert the results into a Pandas DataFrame
df = pd.DataFrame(results)

df[['Offer_Id', 'Marke', 'Type', 'Fuel_Type', 'Price', 'PS']].head()

## Create histogram of car prices

In [None]:
# Create a histogram of car prices
plt.hist(df['Price'], bins=50, color='greenyellow')
plt.title('Histogram of Car Prices')
plt.xlabel('Car price (CHF)')
plt.ylabel('Frequency')
plt.grid()
plt.show()

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')