## MongoDB python programming

### Generating data for a employee table - DATA PREP

- Find or synthesize a JSON dataset.
- This should be stored in either a BSON or JSON file format

In [37]:
from random import randint
import json
## 
fnames = ['John','Betty','July', 'James','Richard','Ethan', 'Liam','Jack','Jerry','Jordan','Susan','Elizabeth', 'Tom']
lnames = ['Jones', 'Smith', 'Carmichael', 'Granger', ' Fox', 'Madden', 'Hart', 'Boone', 'Hale', 'Langston']

emails = ['@gmail.com','@hotmail.com','@yahoo.in']

jobs = ['Manager','Tech Lead','Support','Team Lead','Software Engineer']
cities = ['Tampa','St.Pete','St.Augustine','Naples']


In [38]:
import pymongo
import credentials

connection_string = f"mongodb+srv://{credentials.username}:{credentials.password}@cluster-pilot.79lgo8n.mongodb.net/?retryWrites=true&w=majority"
client = pymongo.MongoClient(connection_string)
db = client.office
collection = db['Employees']

In [39]:
for id in range(1,51):
    fname = fnames[randint(0,len(fnames)-1)]
    email = fname+''+emails[randint(0,(len(emails)-1))]
    #print(f"employee_id:{id}  email:{email}")

    # Generating phone number
    phone_number = '5'
    for j in range(9):
        phone_number += str(randint(0,9))
        if j == 2 or j == 5 :
            phone_number += '-'
    #print(phone_number)

    #Generating job title
    job = jobs[randint(0,(len(jobs)-1))]
    #print(job)

    #generating city 
    city = cities[randint(0,(len(cities)-1))]
    #print(city)

    ## generating zip
    zip = '33'
    for j in range(3):
        zip += str(randint(0,9))
    #print(zip)

    ## generating pay range
    for i in range(0,2):
        pay = randint(5,9)*10000
    #print(pay)

    employee_doc = {
        'Employee_id':id,
        'First_name':fname,
        'Last_name':lnames[randint(0,len(lnames)-1)],
        'email':email,
        'Contact':phone_number,
        'Job_Title':job,
        'City':city,
        'Zip_code':zip,
        'State':'Florida',
        'Pay':pay,
    }

    collection.insert_one(employee_doc)   

In [40]:
#collection.drop()

### Load the data into a MongoDB collection

In [41]:
collection = db['Employees']
docs = collection.find({})
for doc in docs:
    print(doc)

{'_id': ObjectId('650e500aef3885ad35ac5b5e'), 'Employee_id': 1, 'First_name': 'James', 'Last_name': 'Hale', 'email': 'James@yahoo.in', 'Contact': '5216-680-645', 'Job_Title': 'Tech Lead', 'City': 'St.Pete', 'Zip_code': '33134', 'State': 'Florida', 'Pay': 50000, 'Company_id': 7}
{'_id': ObjectId('650e500bef3885ad35ac5b5f'), 'Employee_id': 2, 'First_name': 'James', 'Last_name': 'Smith', 'email': 'James@hotmail.com', 'Contact': '5644-912-410', 'Job_Title': 'Tech Lead', 'City': 'St.Pete', 'Zip_code': '33339', 'State': 'Florida', 'Pay': 50000, 'Company_id': 6}
{'_id': ObjectId('650e500bef3885ad35ac5b60'), 'Employee_id': 3, 'First_name': 'Betty', 'Last_name': 'Hart', 'email': 'Betty@hotmail.com', 'Contact': '5623-757-087', 'Job_Title': 'Software Engineer', 'City': 'Tampa', 'Zip_code': '33813', 'State': 'Florida', 'Pay': 50000, 'Company_id': 4}
{'_id': ObjectId('650e500bef3885ad35ac5b61'), 'Employee_id': 4, 'First_name': 'John', 'Last_name': 'Smith', 'email': 'John@hotmail.com', 'Contact': '5

### Demonstrate an aggregation query on the data

#### Since we already have data about companies from tutorial lets use this to do aggregation on data 

In [42]:
companies_db = client.companies
employees_db = client.office
companies_collection = companies_db['reviews']
employee_collection = employees_db['Employees']


In [43]:
#companies_collection.drop()
#employee_collection.drop()

- Updating the companies table with a company id so that employees table can also be updated

In [44]:
cursor = companies_collection.find({})
for doc in cursor:
    com_id = randint(1,7)
    companies_collection.update_one({"_id":doc["_id"]},{ '$set': { 'Company_id': com_id}})

In [45]:
cursor = companies_collection.find({})
for doc in cursor:
    print(doc)

{'_id': ObjectId('650e38cb3a89870a77ab9fb6'), 'name': 'Fish Tastey Company', 'rating': 2, 'cuisine': 'American', 'Company_id': 1}
{'_id': ObjectId('650e38cc3a89870a77ab9fb7'), 'name': 'Pizza Goat Inc', 'rating': 3, 'cuisine': 'Italian', 'Company_id': 4}
{'_id': ObjectId('650e38cc3a89870a77ab9fb8'), 'name': 'Pizza Pizza Inc', 'rating': 2, 'cuisine': 'Pizza', 'Company_id': 4}
{'_id': ObjectId('650e38cc3a89870a77ab9fb9'), 'name': 'Animal Kitchen Inc', 'rating': 4, 'cuisine': 'Mexican', 'Company_id': 5}
{'_id': ObjectId('650e38cc3a89870a77ab9fba'), 'name': 'State Animal Company', 'rating': 2, 'cuisine': 'Fast Food', 'Company_id': 2}
{'_id': ObjectId('650e38cc3a89870a77ab9fbb'), 'name': 'Big Lazy Corporation', 'rating': 4, 'cuisine': 'Mexican', 'Company_id': 1}
{'_id': ObjectId('650e38cc3a89870a77ab9fbc'), 'name': 'City City Inc', 'rating': 3, 'cuisine': 'Italian', 'Company_id': 2}
{'_id': ObjectId('650e38cc3a89870a77ab9fbd'), 'name': 'Animal City Corporation', 'rating': 2, 'cuisine': 'Vege

- Updating employees table with the company id from company collections

In [46]:
cursor = employee_collection.find({})
com_cur = companies_collection.find({})
for doc in cursor:
    employee_collection.update_one({"_id":doc["_id"]},{ '$set': { 'Company_id':com_cur[randint(0,(companies_collection.count_documents({}))-1)]["Company_id"] }})

In [47]:
collection = db['Employees']
docs = collection.find({})
for doc in docs:
    print(doc)

{'_id': ObjectId('650e500aef3885ad35ac5b5e'), 'Employee_id': 1, 'First_name': 'James', 'Last_name': 'Hale', 'email': 'James@yahoo.in', 'Contact': '5216-680-645', 'Job_Title': 'Tech Lead', 'City': 'St.Pete', 'Zip_code': '33134', 'State': 'Florida', 'Pay': 50000, 'Company_id': 2}
{'_id': ObjectId('650e500bef3885ad35ac5b5f'), 'Employee_id': 2, 'First_name': 'James', 'Last_name': 'Smith', 'email': 'James@hotmail.com', 'Contact': '5644-912-410', 'Job_Title': 'Tech Lead', 'City': 'St.Pete', 'Zip_code': '33339', 'State': 'Florida', 'Pay': 50000, 'Company_id': 5}
{'_id': ObjectId('650e500bef3885ad35ac5b60'), 'Employee_id': 3, 'First_name': 'Betty', 'Last_name': 'Hart', 'email': 'Betty@hotmail.com', 'Contact': '5623-757-087', 'Job_Title': 'Software Engineer', 'City': 'Tampa', 'Zip_code': '33813', 'State': 'Florida', 'Pay': 50000, 'Company_id': 2}
{'_id': ObjectId('650e500bef3885ad35ac5b61'), 'Employee_id': 4, 'First_name': 'John', 'Last_name': 'Smith', 'email': 'John@hotmail.com', 'Contact': '5

#### Calculating the average pay of employees and grouping the results by job title, and sorting the results in descending order of average pay. 

In [57]:
averages = employee_collection.aggregate( [
   {
    "$match" : 
        { "Job_Title" : { "$ne" : None }}
   },
   {
    "$group": 
        { "_id": "$Job_Title", "avg Pay": { "$avg": "$Pay" }}
   },
   { 
    "$sort": 
        { "avg Pay": -1 }
   }
])
result1 = json.dumps(list(averages))

In [58]:
result1

'[{"_id": "Tech Lead", "avg Pay": 71111.11111111111}, {"_id": "Support", "avg Pay": 69583.33333333333}, {"_id": "Manager", "avg Pay": 68823.5294117647}, {"_id": "Team Lead", "avg Pay": 68695.65217391304}, {"_id": "Software Engineer", "avg Pay": 67777.77777777778}]'

####  Calculating the average pay of employees and grouping the results by job title,company and sorting the results in increasing order of Company id's descending order of Job_title.

In [59]:
averages = employee_collection.aggregate( [
   {
    "$match" : 
        { "Job_Title" : { "$ne" : None }}
   },
   { ##Creating a Project to avg pay based on Job title and Company id 
    "$project": 
        {'Job_Title':'$Job_Title',
         "Company" : '$Company_id',
          "avg_pay":{'$avg':'$Pay'} }
   },
   { ## Grouping the employees pay on each company and job title
    "$group": 
        { "_id": {"Company":"$Company","Job_Title": "$Job_Title"}, "avg Pay": { "$avg": "$avg_pay" }}
   },
   { 
    "$sort": 
        { "_id.Company":1,
         "Job_Title":-1
     }
   }
])
result2 = json.dumps(list(averages))

- Save the results from the query to either a JSON or BSON file format.

In [63]:
with open("data/averages-1.json", "w") as json_file:
    json_file.write(result1)

In [64]:
with open("data/averages-2.json", "w") as json_file:
    json_file.write(result2)