## Part 1: Database and Jupyter Notebook Set Up

Imported the data set with following command:

`"C:\Program Files\MongoDB\Tools\100\bin\mongoimport.exe" --type=json --db=project_3 --collection=museums_mx --jsonArray --drop --file=0_museo_directorio.json`

'"C:\Program Files\MongoDB\Tools\100\bin\mongoimport.exe" --type=csv --db=project_3 --collection=museums_mx_csv --headerline --drop --file=0_museo_directorio_coordenadas.csv'

In [1]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# confirm that our new database was created
pprint(mongo.list_database_names())

['admin',
 'autosaurus',
 'config',
 'epa',
 'local',
 'met',
 'petsitly_marketing',
 'project_3',
 'travel_db',
 'uk_food']


In [4]:
# assign the project_3 database to a variable name
db = mongo['project_3']

In [5]:
# review the collections in our new database
db.list_collection_names()

['museums_mx_csv', 'museums_mx']

In [6]:
# review a document in the collection
pprint(db.museums_mx_csv.find_one())

{'_id': ObjectId('65aff3010afc4daead521ff1'),
 'aprox_loc': '',
 'email': 'museoescarcega@hotmail.com',
 'estado_id': 1,
 'fecha_mod': '9/26/2022 07:34',
 'gmaps_latitud': 21.88473,
 'gmaps_longitud': -102.28575,
 'link_sic': 'http://sic.gob.mx/ficha.php?table=museo&table_id=1808',
 'localidad_id': 1,
 'municipio_id': 1,
 'museo_adscripcion': 'Museo Escárcega, A.C.',
 'museo_calle_numero': 'Ezequiel A. Chávez 311',
 'museo_colonia': 'Barrio de la Purísima',
 'museo_cp': 20000,
 'museo_fecha_fundacion': '5/21/2015',
 'museo_id': 1808,
 'museo_nombre': 'Museo Escárcega',
 'museo_telefono1': '449 918 67 30',
 'museo_tematica_n1': 'Arte',
 'nom_ent': 'Aguascalientes',
 'nom_loc': 'Aguascalientes',
 'nom_mun': 'Aguascalientes',
 'pagina_web': 'https://museoeer.wixsite.com/muse/acerca-de-nosotros',
 'pagina_web2': '',
 'twitter': ''}


In [7]:
# assign the collection to a variable
museos = db['museums_mx_csv']

## Part 2: Update the collection

Identify the documents with missing values in museo_tematica_n1 and set to TND (Temática No Definida).

In [8]:
# Find the documents with missing values in museo_tematica_n1
query_tema = {"museo_tematica_n1" : "",}
field_tema = {"museo_tematica_n1" : True,}

tema = museos.find(query_tema, field_tema)
pprint(list(tema))

[{'_id': ObjectId('65aff3010afc4daead521ff9'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead521ffe'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522011'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522013'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead52201b'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead52201c'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead52201f'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522021'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522024'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522026'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522028'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead52202a'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead52202d'), 'museo_tematica_n1': ''},
 {'_id': ObjectId('65aff3010afc4daead522031'), 'mus

In [9]:
print(museos.count_documents(query_tema))
print(museos.count_documents({"museo_tematica_n1" : "TND",}))

341
48


In [10]:
# Update the documents with TND
new_tema = {"$set" : {"museo_tematica_n1" : "TND"}}
museos.update_many(query_tema, new_tema)

<pymongo.results.UpdateResult at 0x180385e5240>

In [11]:
print(museos.count_documents(query_tema))
print(museos.count_documents({"museo_tematica_n1" : "TND",}))

0
389


Identify the documents with erroneous museo_fecha_fundacion and set to NA

In [12]:
# Find the documents with "fundacion" value set to year 1000 in museo_fecha_fundacion
query_fecha = {"museo_fecha_fundacion" : "1000-01-01",}
field_fecha = {"museo_fecha_fundacion" : True,}

fecha = museos.find(query_fecha, field_fecha)
pprint(list(fecha))

[{'_id': ObjectId('65aff3010afc4daead521ff9'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522011'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522021'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522038'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead52203c'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522043'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522047'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead52204e'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522051'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522059'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65aff3010afc4daead522073'),
  'museo_fecha_fundacion': '1000-01-01'},
 {'_id': ObjectId('65

In [13]:
print(museos.count_documents(query_fecha))
print(museos.count_documents({'museo_fecha_fundacion': 'NA',}))

102
0


In [14]:
# Update the documents found with NA
new_fecha = {"$set" : {"museo_fecha_fundacion" : "NA"}}
museos.update_many(query_fecha, new_fecha)

<pymongo.results.UpdateResult at 0x1803a64a240>

In [15]:
print(museos.count_documents(query_fecha))
print(museos.count_documents({'museo_fecha_fundacion': 'NA',}))

0
102


Identify the documents with gmaps_longitud = 0 and gmaps_longitud = 0 and set to ""

In [16]:
# Find the documents with gmaps_longitud = 0 and gmaps_longitud = 0
query_lnglat = {"gmaps_longitud" : 0,}
field_lnglat = {"museo_id" : True, "gmaps_longitud" : True, "gmaps_latitud" : True,}

lnglat = museos.find(query_lnglat, field_lnglat)
pprint(list(lnglat))

[]


In [17]:
print(museos.count_documents(query_lnglat))

0


In [38]:
# Update the documents found with ""
new_lnglat = {"$set" : {"gmaps_longitud" : "", "gmaps_latitud" : ""}}
museos.update_many(query_lnglat, new_lnglat)

<pymongo.results.UpdateResult at 0x172b7a6ba40>

In [39]:
print(museos.count_documents(query_lnglat))

0


In [41]:
# To update the different museums that lack coordinates...
# Find the documents with gmaps_longitud = 0 and gmaps_longitud = 0
query_lnglat_empty = {"gmaps_longitud" : "",}

lnglat_empty = museos.find(query_lnglat_empty, field_lnglat)
print(museos.count_documents(query_lnglat_empty))
pprint(list(lnglat_empty))

87
[{'_id': ObjectId('65af2187ceaf96681e9e97f3'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 930},
 {'_id': ObjectId('65af2187ceaf96681e9e980b'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 1825},
 {'_id': ObjectId('65af2187ceaf96681e9e9828'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 1832},
 {'_id': ObjectId('65af2187ceaf96681e9e982e'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 33},
 {'_id': ObjectId('65af2187ceaf96681e9e9839'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 1822},
 {'_id': ObjectId('65af2187ceaf96681e9e985e'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 1910},
 {'_id': ObjectId('65af2187ceaf96681e9e9867'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 1835},
 {'_id': ObjectId('65af2187ceaf96681e9e986e'),
  'gmaps_latitud': '',
  'gmaps_longitud': '',
  'museo_id': 25},
 {'_id': ObjectId('65af2187ceaf96681e9e9875'),
  'gmaps_latitud': '',
  'gmaps_lon

In [48]:
csv_df = pd.read_csv("./Datasets/0_museo_directorio_coordenadas_utf8.csv")
csv_df.head

<bound method NDFrame.head of       museo_id                                       museo_nombre  \
0          929                               Museo Ferrocarrilero   
1         1808                                    Museo Escárcega   
2          361                                 Museo de la Fauna    
3          602  Descubre. Museo Interactivo de Ciencia y Tecno...   
4         1069             Museo del Juguete Tradicional Mexicano   
...        ...                                                ...   
1605      2046                 Museo de la Industria de Reuniones   
1606       649                           Ex Templo de San Agustín   
1607      2291                      Museo Temático Playmohistoria   
1608      1571             Centro Interactivo de Ciencias Zig Zag   
1609       454                             Museo Francisco Goitia   

         museo_tematica_n1 museo_fecha_fundacion  \
0                 Historia              3/4/2003   
1                     Arte           

In [54]:
coords_df = csv_df["aprox_loc"]
csv_df[coords_df]

KeyError: "None of [Index([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,\n       ...\n       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],\n      dtype='object', length=1610)] are in the [columns]"

In [None]:
## End of project work - 20240116