In [None]:
# https://towardsdatascience.com/how-to-generate-dummy-data-in-python-a05bce24a6c6

In [2]:
# Install faker
!pip install Faker

Collecting Faker
  Downloading Faker-12.3.0-py3-none-any.whl (1.5 MB)
Installing collected packages: Faker
Successfully installed Faker-12.3.0


In [None]:
# Generating generic marketing data which will include some common columns (data format) like:
# Transaction date (timestamp)
# Customer name (string)
# Gender (boolean)
# Email (string)
# City (string)
# Purchased product ID — barcode (string)
# Amount spent (float)

In [17]:
import pandas as pd
import random
from faker import Faker
from random import randrange
from datetime import datetime

In [18]:
no_of_customers = 1000

In [19]:
fake = Faker()
customers = []

In [20]:
for customers_id in range(no_of_customers):
    # Create transaction date
    d1 = datetime.strptime(f'1/1/2021', '%d/%m/%Y')
    d2 = datetime.strptime(f'24/12/2021', '%d/%m/%Y')
    transaction_date = fake.date_between(d1,d2)
    
    # Create customer's name
    name = fake.name()
    
    # Create gender
    gender = random.choice(["M", "F"])
    
    # Create email
    email = fake.ascii_email()
    
    # Create city
    city = fake.city()
    
    # Create product ID in 8-digit barcode
    product_ID = fake.ean(length=8)
    
    # Create amount spent
    amount_spent = fake.pyfloat(right_digits=2, positive=True, min_value=1, max_value=100)
    
    customers.append([transaction_date, name, gender, email, city, product_ID, amount_spent])
    
print(customers)

[[datetime.date(2021, 4, 20), 'Jacob Hayes', 'M', 'bhammond@gmail.com', 'Henryside', '00253673', 13.62], [datetime.date(2021, 4, 3), 'Meagan Miller', 'F', 'katherinebennett@singh-knox.com', 'Hartbury', '82237707', 93.96], [datetime.date(2021, 5, 19), 'Robert Padilla', 'M', 'davisfrances@hotmail.com', 'Andersonland', '87659962', 3.1], [datetime.date(2021, 8, 14), 'Jennifer Farmer', 'M', 'norrisbrandon@jordan.net', 'Karenhaven', '24159142', 34.8], [datetime.date(2021, 8, 17), 'Jonathan Moore', 'M', 'perrylong@gmail.com', 'Youngmouth', '68029258', 7.8], [datetime.date(2021, 4, 8), 'Tanya Peters', 'F', 'michellejohnson@carter.com', 'New Catherineton', '58389355', 81.6], [datetime.date(2021, 11, 19), 'Justin Riley', 'F', 'henrythomas@sherman.biz', 'Paulchester', '05306848', 77.99], [datetime.date(2021, 6, 20), 'Dana Callahan', 'F', 'alan08@valdez.biz', 'West Ryan', '02189857', 74.16], [datetime.date(2021, 10, 3), 'Travis Duran', 'M', 'stephenerickson@cooper.com', 'Bryanthaven', '70008166', 

In [21]:
customers_df = pd.DataFrame(customers, columns=['Transaction_data', 'Name', 'Gender', 'Email', 'City', 'Product ID', 'Amount'])

pd.pandas.set_option('display.max_columns', None)
print(customers_df)

    Transaction_data              Name Gender  \
0         2021-04-20       Jacob Hayes      M   
1         2021-04-03     Meagan Miller      F   
2         2021-05-19    Robert Padilla      M   
3         2021-08-14   Jennifer Farmer      M   
4         2021-08-17    Jonathan Moore      M   
..               ...               ...    ...   
995       2021-11-09        Susan Rowe      F   
996       2021-02-21  Whitney Hatfield      F   
997       2021-07-30     David Elliott      M   
998       2021-10-14       Lisa Murphy      M   
999       2021-09-03       Tracy Smith      F   

                                 Email               City Product ID  Amount  
0                   bhammond@gmail.com          Henryside   00253673   13.62  
1      katherinebennett@singh-knox.com           Hartbury   82237707   93.96  
2             davisfrances@hotmail.com       Andersonland   87659962    3.10  
3             norrisbrandon@jordan.net         Karenhaven   24159142   34.80  
4               

In [35]:
# Insert dummy data to MongoDB
import pymongo
#"mongodb+srv://USERNAME:PASSWORD@cluster0.2wjy8.mongodb.net/DATABASENAME
client = pymongo.MongoClient("mongodb+srv://mongodb:mongodb@cluster0.2wjy8.mongodb.net/myFirstDatabase?retryWrites=true&w=majority")
db = client.test
print(db)

Database(MongoClient(host=['cluster0-shard-00-01.2wjy8.mongodb.net:27017', 'cluster0-shard-00-02.2wjy8.mongodb.net:27017', 'cluster0-shard-00-00.2wjy8.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-f9ce2x-shard-0', tls=True), 'test')


In [36]:
print(client.list_database_names())

['sudh', 'admin', 'local']


In [24]:
# MySQL                MongoDB
# DB                   DB
# Table                Collection
# Row/Record           Document

In [37]:
db2 = client['CustomerTransaction'] # DB

In [38]:
coll1 = db2['Tx_Data_Collection'] # Collection/Table Name

In [59]:
# Transaction date (timestamp)
# Customer name (string)
# Gender (boolean)
# Email (string)
# City (string)
# Purchased product ID — barcode (string)
# Amount spent (float)
# print(customers)
customers_updated = []
keys = ['transaction_date', 'name', 'gender', 'email', 'city', 'product_barcode', 'amount_spent']
for listItem in customers:
    listToStr = ','.join(map(str, listItem)) # join and split to put date and numbers into quotes
    res = dict(zip(keys, listToStr.split(',')))
    customers_updated.append(res)

customers_updated

[{'transaction_date': '2021-04-20',
  'name': 'Jacob Hayes',
  'gender': 'M',
  'email': 'bhammond@gmail.com',
  'city': 'Henryside',
  'product_barcode': '00253673',
  'amount_spent': '13.62'},
 {'transaction_date': '2021-04-03',
  'name': 'Meagan Miller',
  'gender': 'F',
  'email': 'katherinebennett@singh-knox.com',
  'city': 'Hartbury',
  'product_barcode': '82237707',
  'amount_spent': '93.96'},
 {'transaction_date': '2021-05-19',
  'name': 'Robert Padilla',
  'gender': 'M',
  'email': 'davisfrances@hotmail.com',
  'city': 'Andersonland',
  'product_barcode': '87659962',
  'amount_spent': '3.1'},
 {'transaction_date': '2021-08-14',
  'name': 'Jennifer Farmer',
  'gender': 'M',
  'email': 'norrisbrandon@jordan.net',
  'city': 'Karenhaven',
  'product_barcode': '24159142',
  'amount_spent': '34.8'},
 {'transaction_date': '2021-08-17',
  'name': 'Jonathan Moore',
  'gender': 'M',
  'email': 'perrylong@gmail.com',
  'city': 'Youngmouth',
  'product_barcode': '68029258',
  'amount_spen

In [60]:
coll1.insert_many(customers_updated)

<pymongo.results.InsertManyResult at 0x24f9b353e40>

In [56]:
test_keys = ["Rash", "Kil", "Varsha"]
test_values = ['2021-04-20', 'Jacob Hayes', 'M']
  
# Printing original keys-value lists
print ("Original key list is : " + str(test_keys))
print ("Original value list is : " + str(test_values))
  
# using zip()
# to convert lists to dictionary
res = dict(zip(test_keys, test_values))
  
# Printing resultant dictionary 
print ("Resultant dictionary is : " +  str(res))

Original key list is : ['Rash', 'Kil', 'Varsha']
Original value list is : ['2021-04-20', 'Jacob Hayes', 'M']
Resultant dictionary is : {'Rash': '2021-04-20', 'Kil': 'Jacob Hayes', 'Varsha': 'M'}


In [61]:
coll1.find()

<pymongo.cursor.Cursor at 0x24f9c993160>

In [68]:
count = 0
for i in coll1.find({"transaction_date":{'$gt' : '2021-06-20'}, "gender":"F"}):
    count = count + 1

print(count)

255


In [84]:
# db.users.find({"name": {"$regex": "string", "$options": "i"}})

count = 0
for i in coll1.find({"gender":"M", "name": {"$regex": "^m", "$options": "i"}}): #  /^m/
    count = count + 1

print(count)

51
