### Using the shopify data from the Spanner session

### Populate Firestore database with newly designed Shopify data

In [1]:
import uuid
import pandas as pd
from google.cloud import firestore
db = firestore.Client()

#### Apps collection with subcollections: Pricing_Plan (features added), Key_Benefits, and Reviews

In [None]:
batch = db.batch()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/apps.csv', sep=',', header=0, lineterminator='\n')
apps_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/pricing_plans.csv', sep=',', header=0, lineterminator='\n')
pricing_plans_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/pricing_plan_features.csv', sep=',', header=0, lineterminator='\n')
feature_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/key_benefits.csv', sep=',', header=0, lineterminator='\n')
benefits_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/reviews.csv', sep=',', header=0, lineterminator='\n')
reviews_rows = df.values.tolist()

count = 0

for app_row in apps_rows:
    app_record = {}
    app_record['id'] = app_row[0]
    app_record['url'] = app_row[1]
    app_record['title'] = app_row[2]
    app_record['developer'] = app_row[3]
    app_record['developer_link'] = app_row[4]
    app_record['icon'] = app_row[5]
    app_record['rating'] = app_row[6]
    app_record['reviews_count'] = app_row[7]
    
    apps_ref = db.collection('apps').document(app_row[0])
    batch.set(apps_ref, app_record)
    count = count + 1
    if (count == 399):
        count = 0
        batch.commit()
        batch = db.batch() 

    for plan_row in pricing_plans_rows:
        if app_row[0] == plan_row[1]:
            plan_record = {}
            plan_record['id'] = plan_row[0]
            plan_record['title'] = plan_row[2]
            plan_record['price'] = plan_row[3]
            features_list = []
            for feature_row in feature_rows:
                if (feature_row[0] == plan_row[0]) and (feature_row[1] == plan_row[1]):
                    features_list.append(feature_row[2])
            plan_record['features'] = features_list
            
            plan_ref = apps_ref.collection('pricing_plans').document(plan_row[0])
            batch.set(plan_ref, plan_record)

            count = count + 1
            if (count == 399):
                count = 0
                batch.commit()
                batch = db.batch() 

    for benefits_row in benefits_rows:
        if app_row[0] == benefits_row[0]:
            benefits_record = {}
            benefits_record['title'] = benefits_row[1]
            benefits_record['description'] = benefits_row[2]
            
            benefits_id = str(uuid.uuid4())
            benefits_ref = apps_ref.collection('key_benefits').document(benefits_id)
            batch.set(benefits_ref, benefits_record)

            count = count + 1
            if (count == 399):
                count = 0
                batch.commit()
                batch = db.batch() 

    for review_row in reviews_rows:
        if app_row[0] == review_row[0]:
            review_record = {}
            review_record['author'] = review_row[1]
            review_record['rating'] = review_row[2]
            review_record['posted_at'] = review_row[3]

            review_id = str(uuid.uuid4())
            reviews_ref = apps_ref.collection('reviews').document(review_id)
            batch.set(reviews_ref, review_record)

            count = count + 1
            if (count == 399):
                count = 0
                batch.commit()
                batch = db.batch() 

batch.commit()

In [None]:
# we cleared the output in order to decrease scrolling

#### Categories collection with sub collection Apps

In [3]:
batch = db.batch()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/categories.csv', sep=',', header=0, lineterminator='\n')
categories_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/apps_categories.csv', sep=',', header=0, lineterminator='\n')
junction_rows = df.values.tolist()

df = pd.read_csv('/home/jupyter/Amaryllis/spanner_shopify/apps.csv', sep=',', header=0, lineterminator='\n')
apps_rows = df.values.tolist()

for app in apps_rows:
    app_record = {}
    app_record['id'] = app[0]
    app_record['url'] = app[1]
    app_record['title'] = app[2]
    app_record['developer'] = app[3]
    app_record['developer_link'] = app[4]
    app_record['icon'] = app[5]
    app_record['rating'] = app[6]
    app_record['reviews_count'] = app[7]

    for junction in junction_rows:
        # match via app_id
        if app[0] == junction[0]:
            for category in categories_rows:
                # match via category_id
                if junction[1] == category[0]:
                    categories_ref = db.collection('categories').document(category[0])
                    doc = categories_ref.get()
                    if doc.exists:
                        app_ref = categories_ref.collection('apps').document(app[0])
                        batch.set(app_ref, app_record)
                    else:
                        category_record = {}
                        category_record['id'] = category[0]
                        category_record['title'] = category[1]

                        batch.set(categories_ref, category_record)
                        app_ref = categories_ref.collection('apps').document(app[0])
                        batch.set(app_ref, app_record)

    batch.commit()

### Document count for each collection and their subcollection(s)

#### Apps collection and its subcollections document count

In [4]:
appsDocs = db.collection('apps').get()

print("The number of documents in the collection apps is", len(appsDocs))

The number of documents in the collection apps is 3547


In [5]:
keyBenefitsDocs = db.collection_group('key_benefits').stream()

keyBenefitCount = 0
for doc in keyBenefitsDocs:
    keyBenefitCount += 1

print("The number of documents in the subcollection key_benefits is", keyBenefitCount)

The number of documents in the subcollection key_benefits is 9541


In [6]:
reviewsDocs = db.collection_group('reviews').stream()

reviewsCount = 0
for doc in reviewsDocs:
    reviewsCount += 1

print("The number of documents in the subcollection reviews is", reviewsCount)

The number of documents in the subcollection reviews is 124601


In [7]:
pricingPlansDocs = db.collection_group('pricing_plans').stream()

pricingPlansCount = 0
for doc in pricingPlansDocs:
    pricingPlansCount += 1
    
print("The number of documents in the subcollection pricing_plans is", pricingPlansCount)

The number of documents in the subcollection pricing_plans is 6275


#### Categories collection and its app subcollection document count

In [8]:
categoriesDocs = db.collection('categories').get()
print("The number of documents in the collection categories is ", len(categoriesDocs))

The number of documents in the collection categories is  12


In [9]:
allAppsDocs = db.collection_group('apps').stream()
# This count includes categories' apps subcollection and the top level apps collection
allAppsCount = 0
for doc in allAppsDocs:
    allAppsCount += 1

# This count is just of the top level apps collection
appsCollection = db.collection('apps').get()
topLevelAppsCount = len(appsCollection)
    
# Categories collection's apps subcollection document count
catAppsCount = allAppsCount - topLevelAppsCount
    
print("The number of documents in the subcollection apps is", catAppsCount)

The number of documents in the subcollection apps is 5383


### List the 10 apps with the highest number of reviews based on apps.review_count

In [None]:
# Return the id, title, developer, rating, and reviews_count of those apps. 
# Order by reviews_count in descending order

In [12]:
highestReviews = db.collection("apps").order_by("reviews_count", direction=firestore.Query.DESCENDING).limit(10)

results = highestReviews.stream()

for doc in results:
    doc = doc.to_dict()
    a = doc['title']
    b = doc['developer']
    c = doc['rating']
    d = doc['reviews_count']
    print("ID:", doc['id'], "=>", {"title":a, "developer":b, "rating":c, "reviews_count":d} )

ID: d9f142ee-b141-4dc4-9353-173db61d2eb0 => {'title': 'Privy ‑ Exit Pop Ups & Email', 'developer': 'Privy', 'rating': 4.7, 'reviews_count': 23078}
ID: 78ea0810-c008-4a4e-a82f-de0c790e3286 => {'title': 'Free Shipping Bar', 'developer': 'Hextom', 'rating': 4.9, 'reviews_count': 8737}
ID: b88488b0-9912-44d3-b736-224c36f09d95 => {'title': 'Sales Pop ‑ Popup Notification', 'developer': 'CartKit', 'rating': 4.8, 'reviews_count': 6905}
ID: e528a60e-94f8-4e92-80e2-5bc6013b8283 => {'title': 'BEST Currency Converter', 'developer': 'Grizzly Apps', 'rating': 4.8, 'reviews_count': 5986}
ID: be2640c4-01b5-4d52-9f68-cae8c0734d0d => {'title': 'Recart FB Messenger Marketing', 'developer': 'Recart', 'rating': 4.8, 'reviews_count': 5596}
ID: 70bff9e0-4316-4cc6-84ce-92fcd1bc6925 => {'title': 'EU Cookie Bar ‑ Cookie GDPR', 'developer': 'Booster Apps', 'rating': 4.7, 'reviews_count': 5259}
ID: 171816e2-27d4-4552-a65e-ab44a312fe04 => {'title': 'Sales Pop Master ‑ Countdown', 'developer': 'Autoketing', 'ratin