In [1]:
import pandas as pd
from pymongo import MongoClient
import pymongo.errors
from config import mongodb_config  # Get information from the config.py file

try:
    # Connect to MongoDB
    uri = f"mongodb+srv://{mongodb_config['username']}:{mongodb_config['password']}@cluster0.o5mucqp.mongodb.net/"
    client = MongoClient(uri)

    # Select the database and collection
    db = client[mongodb_config['database_name']]
    collection = db[mongodb_config['collection_name']]

    # Read the CSV file
    df = pd.read_csv('data.csv')

    # Convert DataFrame to a list of dictionaries
    data_list = df.to_dict('records')

    # Insert all documents into MongoDB at once
    collection.insert_many(data_list)

    num_documents_after = collection.count_documents({})  # Get the new number of documents

    print(f"{num_documents_after} documents successfully added to MongoDB.")

except pymongo.errors.PyMongoError as pymongo_error:
    print(f"PyMongo error: {pymongo_error}")

except Exception as python_error:
    print(f"Python error: {python_error}")

finally:
    # Close the MongoDB connection
    if 'client' in locals() and client is not None:
        client.close()

62766 documents successfully added to MongoDB.


In [2]:
import pandas as pd
from pymongo import MongoClient
import pymongo.errors
from config import mongodb_config  # Get information from the config.py file

try:
    # Connect to MongoDB
    uri = f"mongodb+srv://{mongodb_config['username']}:{mongodb_config['password']}@cluster0.o5mucqp.mongodb.net/"
    client = MongoClient(uri)

    # Select the database and collection
    db = client[mongodb_config['database_name']]
    collection = db[mongodb_config['collection_name']]

    # Get the total number of documents before retrieving data
    total_num_documents = collection.count_documents({})

    # Retrieve data from MongoDB
    cursor = collection.find()
    df = pd.DataFrame(list(cursor))

    # Save to CSV file
    df.to_csv('mongodb_data_export.csv', index=False)

    # Get the total number of documents
    print(f"Data successfully saved to CSV file.")
    print(f"Total number of documents : {total_num_documents}")

except pymongo.errors.PyMongoError as pymongo_error:
    print(f"PyMongo error: {pymongo_error}")

except Exception as python_error:
    print(f"Python error: {python_error}")

finally:
    # Close the MongoDB connection
    if 'client' in locals() and client is not None:
        client.close()

Data successfully saved to CSV file.
Total number of documents : 62766


In [3]:
df_local = pd.read_csv('data.csv')
df_local.shape

(62766, 49)

In [4]:
df_local.columns

Index(['Unnamed: 0', 'location', 'make', 'model', 'short_description', 'price',
       'mileage', 'gearbox', 'registration', 'fuel_type', 'power', 'seller',
       'body_type', 'type', 'drivetrain', 'seats', 'doors', 'country_version',
       'offer_number', 'warranty', 'Mileage', 'first_registration',
       'production_date', 'general_inspection', 'last_service',
       'full_service_history', 'non_smoker_vehicle', 'previous_owner',
       'last_timing_belt_change', 'Power', 'Gearbox', 'engine_size', 'gears',
       'cylinders', 'empty_weight', 'Fuel_type', 'fuel_consumption',
       'co2_emissions', 'emission_class', 'emissions_sticker',
       '\nComfort & Convenience\n', '\nEntertainment & Media\n',
       '\nSafety & Security\n', 'extras', '\ncolour', '\nmanufacturer_color',
       '\npaint', ' upholstery_colour ', ' upholstery '],
      dtype='object')

In [5]:
df_mongo = pd.read_csv('mongodb_data_export.csv')
df_mongo.shape

(62766, 50)

In [6]:
df_mongo.columns

Index(['_id', 'Unnamed: 0', 'location', 'make', 'model', 'short_description',
       'price', 'mileage', 'gearbox', 'registration', 'fuel_type', 'power',
       'seller', 'body_type', 'type', 'drivetrain', 'seats', 'doors',
       'country_version', 'offer_number', 'warranty', 'Mileage',
       'first_registration', 'production_date', 'general_inspection',
       'last_service', 'full_service_history', 'non_smoker_vehicle',
       'previous_owner', 'last_timing_belt_change', 'Power', 'Gearbox',
       'engine_size', 'gears', 'cylinders', 'empty_weight', 'Fuel_type',
       'fuel_consumption', 'co2_emissions', 'emission_class',
       'emissions_sticker', '\nComfort & Convenience\n',
       '\nEntertainment & Media\n', '\nSafety & Security\n', 'extras',
       '\ncolour', '\nmanufacturer_color', '\npaint', ' upholstery_colour ',
       ' upholstery '],
      dtype='object')

In [7]:
# Finding the difference between column names
difference = set(df_mongo.columns) - set(df_local.columns)

# Printing the difference
print("Column names in the first DataFrame but not in the second DataFrame:", difference)

Column names in the first DataFrame but not in the second DataFrame: {'_id'}
