In [5]:
import pandas as pd
from pymongo import MongoClient

In [6]:
def create_mongo_database(mongo_url, db_name):
    """
    Function to create or get a MongoDB database.
    """
    # Connect to the MongoDB client
    client = MongoClient(mongo_url)

    # Create or access the database
    db = client[db_name]
    return db

def create_mongo_collection(db, collection_name):
    """
    Function to create or get a MongoDB collection.
    """
    # Create collection if not exists
    if collection_name not in db.list_collection_names():
        collection = db.create_collection(collection_name)
        print(f"Collection '{collection_name}' created.")
    else:
        collection = db[collection_name]
        print(f"Collection '{collection_name}' already exists.")
    
    return collection

# def import_csv_to_mongodb(csv_file_path, collection):
#     """
#     Function to import CSV data into a MongoDB collection, with manifest_permission as an array.
#     """
#     # Read CSV into pandas dataframe
#     df = pd.read_csv(csv_file_path)

#     # Convert each row to a dictionary and insert into the MongoDB collection
#     for index, row in df.iterrows():
#         # Split manifest_permission into an array (assuming they are comma-separated in the CSV)
#         if pd.notna(row['manifest_permission']):
#             manifest_permission_array = row['manifest_permission'].split('\n')
#         else:
#             manifest_permission_array = []

#         # Create a document with pkg_name as the unique _id
#         document = {
#             "_id": row['pkg_name'],  # Use pkg_name as the unique _id
#             "apkfile_name": row['apkfile_name'],
#             "apkfile_name_new": row['apkfile_name_new'],
#             "extract_manifest": row['extract_manifest'],
#             "manifest_permission": manifest_permission_array,  # Store as array
#             "app_group": row['app_group'],
#             "app_group_reason": row['app_group_reason']
#         }

#         try:
#             # Insert document into MongoDB (it will skip if _id already exists)
#             collection.insert_one(document)
#             print(f"Inserted document for pkg_name: {row['pkg_name']}")
#         except Exception as e:
#             print(f"Error inserting document for pkg_name: {row['pkg_name']}. Error: {e}")
def import_csv_to_mongodb(csv_file_path, collection):
    """
    Function to import CSV data into a MongoDB collection, with manifest_permission as an array.
    """
    # Read CSV into pandas dataframe
    df = pd.read_csv(csv_file_path)

    # Convert each row to a dictionary and insert into the MongoDB collection
    for index, row in df.iterrows():
        # Check if the document with the same _id (pkg_name) already exists
        if collection.find_one({"_id": row['pkg_name']}):
            print(f"Document with pkg_name '{row['pkg_name']}' already exists. Skipping insertion.")
            continue  # Skip this document and move to the next one

        # Split manifest_permission into an array (assuming they are comma-separated in the CSV)
        if pd.notna(row['manifest_permission']):
            manifest_permission_array = row['manifest_permission'].split('\n')
        else:
            manifest_permission_array = []

        # Create a document with pkg_name as the unique _id
        document = {
            "_id": row['pkg_name'],  # Use pkg_name as the unique _id
            "apkfile_name": row['apkfile_name'],
            "apkfile_name_new": row['apkfile_name_new'],
            "extract_manifest": row['extract_manifest'],
            "manifest_permission": manifest_permission_array,  # Store as array
            "app_group": row['app_group'],
            "app_group_reason": row['app_group_reason']
        }

        try:
            # Insert document into MongoDB
            collection.insert_one(document)
            print(f"Inserted document for pkg_name: {row['pkg_name']}")
        except Exception as e:
            print(f"Error inserting document for pkg_name: {row['pkg_name']}. Error: {e}")

In [7]:
# MongoDB configuration
mongo_url = "mongodb://localhost:27017"
db_name = "wearable-project"
collection_name = "wearable-app"
# standalone
db_name = "wearable-project"
collection_name = "wearable-standalone"

# CSV file path
# csv_file_path = r"C:\Users\ASUS\anaconda3\wearable-apk-manifest\combine_filtered_cleaned.csv"

csv_file_path = r"C:\\Users\\ASUS\\anaconda3\\wearable-apk-manifest\\apk-wearable-standalone.csv"

In [8]:
# Create MongoDB database
db = create_mongo_database(mongo_url, db_name)

# Create MongoDB collection
collection = create_mongo_collection(db, collection_name)

# Import CSV data to MongoDB collection
import_csv_to_mongodb(csv_file_path, collection)

Collection 'wearable-standalone' already exists.
Inserted document for pkg_name: app.groupcal.www
Inserted document for pkg_name: apps.r.compass
Inserted document for pkg_name: ch.publisheria.bring
Inserted document for pkg_name: com.albuquerquedesign.adanalog013
Inserted document for pkg_name: com.anghami
Inserted document for pkg_name: com.audible.application
Inserted document for pkg_name: com.c25k
Inserted document for pkg_name: com.cardiogram.v1
Inserted document for pkg_name: com.centr.app
Inserted document for pkg_name: com.codverter.wearflashlight
Inserted document for pkg_name: com.contorra.golfpad
Inserted document for pkg_name: com.customsolutions.android.alexa
Inserted document for pkg_name: com.dungelin.heartrate
Inserted document for pkg_name: com.exovoid.weather.app
Inserted document for pkg_name: com.fish4fun.mycards
Inserted document for pkg_name: com.fitbod.fitbod
Inserted document for pkg_name: com.fitiv.fitivapplication
Inserted document for pkg_name: com.fiton.andr