In [30]:
import pandas as pd
from pymongo import MongoClient

In [31]:
def create_mongo_database(mongo_url, db_name):
    """
    Function to create or get a MongoDB database.
    """
    # Connect to the MongoDB client
    client = MongoClient(mongo_url)

    # Create or access the database
    db = client[db_name]
    return db

def create_mongo_collection(db, collection_name):
    """
    Function to create or get a MongoDB collection.
    """
    # Create collection if not exists
    if collection_name not in db.list_collection_names():
        collection = db.create_collection(collection_name)
        print(f"Collection '{collection_name}' created.")
    else:
        collection = db[collection_name]
        print(f"Collection '{collection_name}' already exists.")
    
    return collection

# def import_csv_to_mongodb(csv_file_path, collection):
#     """
#     Function to import CSV data into a MongoDB collection, with manifest_permission as an array.
#     """
#     # Read CSV into pandas dataframe
#     df = pd.read_csv(csv_file_path)

#     # Convert each row to a dictionary and insert into the MongoDB collection
#     for index, row in df.iterrows():
#         # Split manifest_permission into an array (assuming they are comma-separated in the CSV)
#         if pd.notna(row['manifest_permission']):
#             manifest_permission_array = row['manifest_permission'].split('\n')
#         else:
#             manifest_permission_array = []

#         # Create a document with pkg_name as the unique _id
#         document = {
#             "_id": row['pkg_name'],  # Use pkg_name as the unique _id
#             "apkfile_name": row['apkfile_name'],
#             "apkfile_name_new": row['apkfile_name_new'],
#             "extract_manifest": row['extract_manifest'],
#             "manifest_permission": manifest_permission_array,  # Store as array
#             "app_group": row['app_group'],
#             "app_group_reason": row['app_group_reason']
#         }

#         try:
#             # Insert document into MongoDB (it will skip if _id already exists)
#             collection.insert_one(document)
#             print(f"Inserted document for pkg_name: {row['pkg_name']}")
#         except Exception as e:
#             print(f"Error inserting document for pkg_name: {row['pkg_name']}. Error: {e}")
def import_csv_to_mongodb(csv_file_path, collection):
    """
    Function to import CSV data into a MongoDB collection, with manifest_permission as an array.
    """
    # Read CSV into pandas dataframe
    df = pd.read_csv(csv_file_path)

    # Convert each row to a dictionary and insert into the MongoDB collection
    for index, row in df.iterrows():
        # Check if the document with the same _id (pkg_name) already exists
        if collection.find_one({"_id": row['pkg_name']}):
            print(f"Document with pkg_name '{row['pkg_name']}' already exists. Skipping insertion.")
            continue  # Skip this document and move to the next one

        # Split manifest_permission into an array (assuming they are comma-separated in the CSV)
        if pd.notna(row['manifest_permission']):
            manifest_permission_array = row['manifest_permission'].split('\n')
        else:
            manifest_permission_array = []

        # Create a document with pkg_name as the unique _id
        document = {
            "_id": row['pkg_name'],  # Use pkg_name as the unique _id
            "apkfile_name": row['apkfile_name'],
            "apkfile_name_new": row['apkfile_name_new'],
            "extract_manifest": row['extract_manifest'],
            "manifest_permission": manifest_permission_array,  # Store as array
            "app_group": row['app_group'],
            "app_group_reason": row['app_group_reason']
        }

        try:
            # Insert document into MongoDB
            collection.insert_one(document)
            print(f"Inserted document for pkg_name: {row['pkg_name']}")
        except Exception as e:
            print(f"Error inserting document for pkg_name: {row['pkg_name']}. Error: {e}")

In [32]:
# MongoDB configuration
mongo_url = "mongodb://localhost:27017"
db_name = "wearable-project"
collection_name = "wearable-app"
# standalone
db_name = "wearable-project"
collection_name = "wearable-standalone"

# CSV file path
# csv_file_path = r"C:\Users\ASUS\anaconda3\wearable-apk-manifest\combine_filtered_cleaned.csv"

csv_file_path = r"C:\\Users\\ASUS\\anaconda3\\wearable-apk-manifest\\apk-wearable-standalone.csv"

In [33]:
# Create MongoDB database
db = create_mongo_database(mongo_url, db_name)

# Create MongoDB collection
collection = create_mongo_collection(db, collection_name)

# Import CSV data to MongoDB collection
import_csv_to_mongodb(csv_file_path, collection)

Collection 'wearable-app' already exists.
Document with pkg_name 'in.codeseed.audify' already exists. Skipping insertion.
Document with pkg_name 'com.urbandroid.lux' already exists. Skipping insertion.
Document with pkg_name 'maxcom.toolbox.altimeter' already exists. Skipping insertion.
Document with pkg_name 'com.reflive.reflive' already exists. Skipping insertion.
Document with pkg_name 'com.tomtop.smart' already exists. Skipping insertion.
Document with pkg_name 'com.mauriciotogneri.shoppinglist' already exists. Skipping insertion.
Document with pkg_name 'com.szyk.myheart' already exists. Skipping insertion.
Document with pkg_name 'interactiveshift.com.trailswear' already exists. Skipping insertion.
Document with pkg_name 'com.atlaswearables.ionic' already exists. Skipping insertion.
Document with pkg_name 'je.fit' already exists. Skipping insertion.
Document with pkg_name 'com.acquasys.contrack' already exists. Skipping insertion.
Document with pkg_name 'com.daniel.apps.wpe' alread

Document with pkg_name 'com.flick.awesomeclockwidget.watchface.beta' already exists. Skipping insertion.
Document with pkg_name 'com.myfitnesspal.android' already exists. Skipping insertion.
Document with pkg_name 'com.trainerize.tmo' already exists. Skipping insertion.
Document with pkg_name 'com.fitbit.FitbitMobile' already exists. Skipping insertion.
Document with pkg_name 'us.tx.state.hhsc.ytbmobile' already exists. Skipping insertion.
Document with pkg_name 'pedometer.steptracker.calorieburner.stepcounter' already exists. Skipping insertion.
Document with pkg_name 'homeworkout.homeworkouts.noequipment' already exists. Skipping insertion.
Document with pkg_name 'com.fatsecret.android' already exists. Skipping insertion.
Document with pkg_name 'com.chipsea.btcontrol.en' already exists. Skipping insertion.
Document with pkg_name 'bodyfast.zero.fastingtracker.weightloss' already exists. Skipping insertion.
Document with pkg_name 'com.garmin.android.apps.connectmobile' already exists. 

Document with pkg_name 'com.vsp.mobile' already exists. Skipping insertion.
Document with pkg_name 'com.truecoverage.app' already exists. Skipping insertion.
Document with pkg_name 'com.casadozeps.cocofeliz' already exists. Skipping insertion.
Document with pkg_name 'musclemonster.fitness.workout.home.gym.planner' already exists. Skipping insertion.
Document with pkg_name 'com.stealth.stealthcorechallenge2' already exists. Skipping insertion.
Document with pkg_name 'com.smartwidgetlabs.fitbitandroid' already exists. Skipping insertion.
Document with pkg_name 'com.sanitas.misanitas' already exists. Skipping insertion.
Document with pkg_name 'com.Hypnosis' already exists. Skipping insertion.
Document with pkg_name 'com.burnandbuildapp.app' already exists. Skipping insertion.
Document with pkg_name 'com.excellus.memberapp' already exists. Skipping insertion.
Document with pkg_name 'com.runbuddy.prod' already exists. Skipping insertion.
Document with pkg_name 'workoutforwomen.femalefitness

Document with pkg_name 'com.lighthouse1.mobilebenefits.upmbp2' already exists. Skipping insertion.
Document with pkg_name 'com.qweqweq.toothbrush' already exists. Skipping insertion.
Document with pkg_name 'com.caremerge.staffapp' already exists. Skipping insertion.
Document with pkg_name 'com.innovatise.justfit' already exists. Skipping insertion.
Document with pkg_name 'com.hyst.base.feverhealthy' already exists. Skipping insertion.
Document with pkg_name 'com.duretechnologies.apps.android.ovc' already exists. Skipping insertion.
Document with pkg_name 'app.mfit.personal' already exists. Skipping insertion.
Document with pkg_name 'com.ribbon.bestbody' already exists. Skipping insertion.
Document with pkg_name 'com.TeamBodyProject.TeamBodyProject' already exists. Skipping insertion.
Document with pkg_name 'com.somotsoft.pteverywhere' already exists. Skipping insertion.
Document with pkg_name 'com.mobifitness.nsfit104363' already exists. Skipping insertion.
Document with pkg_name 'com.

Document with pkg_name 'de.gymondo.app.gymondo' already exists. Skipping insertion.
Document with pkg_name 'com.craftbox.dietbox' already exists. Skipping insertion.
Document with pkg_name 'com.bitwalker.pmf' already exists. Skipping insertion.
Document with pkg_name 'fr.interiale.ITE' already exists. Skipping insertion.
Document with pkg_name 'com.cbt.mindhealthy' already exists. Skipping insertion.
Document with pkg_name 'jp.healthandrights.coyomiv2' already exists. Skipping insertion.
Document with pkg_name 'com.flowneuroscience.flow.droid' already exists. Skipping insertion.
Document with pkg_name 'com.workoutapps.height.increase.workouts.inch' already exists. Skipping insertion.
Document with pkg_name 'com.heartbeattracker.pulseoximeter.heartrate' already exists. Skipping insertion.
Document with pkg_name 'online.strongman.app' already exists. Skipping insertion.
Document with pkg_name 'com.decathlon.racer' already exists. Skipping insertion.
Document with pkg_name 'com.perfectgym

Document with pkg_name 'com.sportheroes.edfsportenergie' already exists. Skipping insertion.
Document with pkg_name 'com.amandadreher.meditacao' already exists. Skipping insertion.
Document with pkg_name 'jp.gr.java_conf.aho.cureflu' already exists. Skipping insertion.
Document with pkg_name 'jp.bspr.braincure.jp' already exists. Skipping insertion.
Document with pkg_name 'com.getmuv' already exists. Skipping insertion.
Document with pkg_name 'com.metlife.korea.custom.health360' already exists. Skipping insertion.
Document with pkg_name 'com.yc.gtfit' already exists. Skipping insertion.
Document with pkg_name 'com.hardyinfinity.bluelightfilter.lite' already exists. Skipping insertion.
Document with pkg_name 'ru.morizo.clatch' already exists. Skipping insertion.
Document with pkg_name 'pedometer.stepcounter.fitnessapp.stepstracker' already exists. Skipping insertion.
Document with pkg_name 'com.movile.meditation.vivo' already exists. Skipping insertion.
Document with pkg_name 'com.veeva

Document with pkg_name 'com.quirco.sanatorium.mobile' already exists. Skipping insertion.
Document with pkg_name 'tv.uscreen.xuanlanyoga' already exists. Skipping insertion.
Document with pkg_name 'net.p4p.absen' already exists. Skipping insertion.
Document with pkg_name 'com.waspito' already exists. Skipping insertion.
Document with pkg_name 'com.alex.crockfit' already exists. Skipping insertion.
Document with pkg_name 'app.lenus' already exists. Skipping insertion.
Document with pkg_name 'com.newbenefits.mybenefitswork' already exists. Skipping insertion.
Document with pkg_name 'com.dnt7.threeW' already exists. Skipping insertion.
Document with pkg_name 'org.eson.getfit3' already exists. Skipping insertion.
Document with pkg_name 'com.oky.ph' already exists. Skipping insertion.
Document with pkg_name 'colmena.app' already exists. Skipping insertion.
Document with pkg_name 'com.sdgcode.pedometer365' already exists. Skipping insertion.
Document with pkg_name 'bdo.trybe' already exists.