In [59]:
import pymongo
from pymongo import MongoClient
import os
import pandas as pd

In [61]:
cluster=MongoClient('mongodb://localhost:27017')
#create database
db=cluster['etl-project']  
#creating collections
election=db['election'] 
census=db['census'] 
vaccine=db['vaccine'] 
combined=db['combined']

In [82]:
#to find all the databases
cluster.list_database_names()

['ClassDB',
 'MissionMars',
 'admin',
 'commerce_db',
 'config',
 'etl-project',
 'etl_project',
 'fruits_db',
 'local',
 'store_inventory']

In [83]:
#to find all collections for particular database
db.list_collection_names()

['election', 'census', 'combined', 'vaccine']

In [64]:
# path of cleaned csv files
path_election=os.path.join('..','data', 'cleaned_data', 'election_data.csv')
path_vaccine=os.path.join('..','data', 'cleaned_data', 'vaccine_data.csv')
path_census=os.path.join('..','data', 'cleaned_data', 'census_data.csv')
path_comb=os.path.join('..','data', 'cleaned_data', 'combined_data.csv')

In [65]:
#creating a function to insert csv files in MongoDB collections
def csv_to_json(filename, header=0):
    data = pd.read_csv(filename, header=header)
    return data.to_dict('records')#records is list like

#print(csv_to_json(path_election))


In [66]:
#Insert a record in collections:
election.insert_many(csv_to_json(path_election))
census.insert_many(csv_to_json(path_census))
vaccine.insert_many(csv_to_json(path_vaccine))
combined.insert_many(csv_to_json(path_comb))

<pymongo.results.InsertManyResult at 0x7faa7fa3f580>

## Election-collection

In [96]:
#To select data from a collections in MongoDB, 
election.find_one()

{'_id': ObjectId('6180100eab2967b695b8f303'),
 'County': 'Abbeville County',
 'State': 'SC',
 'Republic': 0.661,
 'Democrate': 0.33,
 'Win': 'R'}

In [87]:
# to select all where democrat won(limiting to 10 only)
for x in election .find({'Win':"D"}).limit(10):
    print(x)

{'_id': ObjectId('6180100eab2967b695b8f305'), 'County': 'Abington County', 'State': 'MA', 'Republic': 0.439, 'Democrate': 0.537, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f309'), 'County': 'Acton County', 'State': 'MA', 'Republic': 0.177, 'Democrate': 0.798, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f30b'), 'County': 'Acworth County', 'State': 'NH', 'Republic': 0.473, 'Democrate': 0.515, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f311'), 'County': 'Adams County', 'State': 'MS', 'Republic': 0.426, 'Democrate': 0.564, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f312'), 'County': 'Adams County', 'State': 'MA', 'Republic': 0.322, 'Democrate': 0.652, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f31b'), 'County': 'Adams County', 'State': 'CO', 'Republic': 0.404, 'Democrate': 0.567, 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f31f'), 'County': 'Addison County', 'State': 'VT', 'Republic': 0.436, 'Democrate': 0.511, 'Win': 'D'}
{'_id': ObjectId('6180100ea

In [89]:
# to select only county where democrat won(limiting to 10 only)
#1 will show that columns
for x in election .find({'Win':"D"},{"_id":0,"County":1}).limit(10):
    print(x)

{'County': 'Abington County'}
{'County': 'Acton County'}
{'County': 'Acworth County'}
{'County': 'Adams County'}
{'County': 'Adams County'}
{'County': 'Adams County'}
{'County': 'Addison County'}
{'County': 'Alachua County'}
{'County': 'Alameda County'}
{'County': 'Albany County'}


In [103]:
#selecting specific columns of collection election(1- will show columns)
for x in election.find({}, {"County":1,"Win":1}).limit(10):
    print(x)

{'_id': ObjectId('6180100eab2967b695b8f303'), 'County': 'Abbeville County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f304'), 'County': 'Abbot County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f305'), 'County': 'Abington County', 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f306'), 'County': 'Acadia County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f307'), 'County': 'Accomack County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f308'), 'County': 'Acton County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f309'), 'County': 'Acton County', 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f30a'), 'County': 'Acushnet County', 'Win': 'R'}
{'_id': ObjectId('6180100eab2967b695b8f30b'), 'County': 'Acworth County', 'Win': 'D'}
{'_id': ObjectId('6180100eab2967b695b8f30c'), 'County': 'Ada County', 'Win': 'R'}


In [102]:
#to update collections
#election.update({}, {$set:{}})
election.update_one({"_id":"6180100eab2967b695b8f303"}, {"$set":{"County":"Abbeville"}})

<pymongo.results.UpdateResult at 0x7faa7c487240>

## Vaccine-collection

In [90]:
#selecting specific columns of collection vaccine 
for x in vaccine.find({}, {"_id":0,"County":1,'Fully_Vaccinated':1}).limit(10):
    print(x)

{'County': 'Marengo County', 'Fully_Vaccinated': 9272}
{'County': 'Taylor County', 'Fully_Vaccinated': 12516}
{'County': 'Jefferson County', 'Fully_Vaccinated': 25225}
{'County': 'Musselshell County', 'Fully_Vaccinated': 1443}
{'County': 'Powder River County', 'Fully_Vaccinated': 330}
{'County': 'Henry County', 'Fully_Vaccinated': 22954}
{'County': 'Catawba County', 'Fully_Vaccinated': 78378}
{'County': 'Ohio County', 'Fully_Vaccinated': 5232}
{'County': 'Adams County', 'Fully_Vaccinated': 10165}
{'County': 'McDowell County', 'Fully_Vaccinated': 21385}


In [105]:
#to find county that start with letter a in combined collection
myquery = { "County": { "$regex": "^A" } }

comb = combined.find(myquery,{"_id":0,"County":1,"Black Population Rate":1 })

for x in comb:
    print(x)

{'County': 'Adams County', 'Black Population Rate': 0.3940728709080766}
{'County': 'Arapahoe County', 'Black Population Rate': 9.169297253720153}
{'County': 'Avery County', 'Black Population Rate': 1.4427744646468164}
{'County': 'Albany County', 'Black Population Rate': 0.8664203859361786}
{'County': 'Alger County', 'Black Population Rate': 1.7536294431301536}
{'County': 'Avoyelles Parish', 'Black Population Rate': 19.940407715187387}
{'County': 'Anne Arundel County', 'Black Population Rate': 13.308040327422331}
{'County': 'Allen County', 'Black Population Rate': 8.794014274823654}
{'County': 'Adair County', 'Black Population Rate': 0.4471711214535487}
{'County': 'Acadia Parish', 'Black Population Rate': 10.600556337446946}
{'County': 'Ashland County', 'Black Population Rate': 0.1467742049933831}
{'County': 'Albemarle County', 'Black Population Rate': 10.81516645569755}
{'County': 'Androscoggin County', 'Black Population Rate': 0.7489576970212145}
{'County': 'Ashe County', 'Black Popul