# Cleaning Notebook

In [None]:
%pip install pyjsonviewer

In [None]:
!pip install cartoframes

In [None]:
%pip install geopandas

In [1]:
import requests
import json
from dotenv import load_dotenv
import os
import pandas as pd
import sys
sys.path.append('/Users/maperezdeayalas/Documents/Ironhack/Geospatial-Data-Project-/SRC')
import functions as fc
from functools import reduce
import operator
import geopandas as gpd
import shapely.geometry
from pymongo import MongoClient
from pymongo import GEOSPHERE

#### As mentioned in the README, the cities I am going to work with for this project are: Madrid, Tel Aviv & San Francisco 

## I. Establishing the key coordinates for each city

#### For SF & TA I have taken the main train stations situated at the center of the city, and for Madrid the 'Torre Picasso' building situated next to Nuevos Ministerios train station.


#### To get the coordinates I have used a function and the geocode API 

In [2]:
madrid_location = fc.geocode('Torre Picasso, Madrid')
madrid_location

{'type': 'Point', 'coordinates': ['40.45007', '-3.69280']}

In [3]:
san_francisco_location = fc.geocode('Caltrain Station San Francisco')
san_francisco_location

{'type': 'Point', 'coordinates': ['37.68461', '-122.39846']}

In [6]:
tel_aviv_location = fc.geocode('Tel Aviv Savidor Mercaz')
tel_aviv_location

{'type': 'Point', 'coordinates': ['32.08757', '34.78461']}

## II. Calling Foursquare API

#### Now for each of my requirements I am going to call the Foursquare API using a function 

In [11]:
load_dotenv()

True

In [12]:
url_query = 'https://api.foursquare.com/v2/venues/explore'

In [13]:
queries = ['Pub', 'Basketball Court', 'Starbucks', 'Preschool', 'Train Station']

In [14]:
madrid = fc.get_parameters(madrid_location['coordinates'], *queries)
# madrid

In [15]:
san_francisco = fc.get_parameters(san_francisco_location['coordinates'], *queries)
# san_francisco

In [16]:
tel_aviv = fc.get_parameters(tel_aviv_location['coordinates'], *queries)
# tel_aviv

In [17]:
madrid['Pub']['response']['groups'][0]['items'][0]['venue'] # Here is where I have all my key data

{'id': '4adcda35f964a5207a3b21e3',
 'name': 'The Irish Rover',
 'contact': {},
 'location': {'address': 'Av. del Brasil 7',
  'lat': 40.45478612229017,
  'lng': -3.6937700295880824,
  'labeledLatLngs': [{'label': 'display',
    'lat': 40.45478612229017,
    'lng': -3.6937700295880824}],
  'distance': 531,
  'postalCode': '28020',
  'cc': 'ES',
  'city': 'Madrid',
  'state': 'Madrid',
  'country': 'España',
  'formattedAddress': ['Av. del Brasil 7', '28020 Madrid Madrid', 'España']},
 'categories': [{'id': '4bf58dd8d48988d11b941735',
   'name': 'Pub',
   'pluralName': 'Pubs',
   'shortName': 'Pub',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/pub_',
    'suffix': '.png'},
   'primary': True}],
 'verified': False,
 'stats': {'tipCount': 0,
  'usersCount': 0,
  'checkinsCount': 0,
  'visitsCount': 0},
 'beenHere': {'count': 0,
  'lastCheckinExpiredAt': 0,
  'marked': False,
  'unconfirmedCount': 0},
 'photos': {'count': 0, 'groups': []},
 'hereNow': {'count': 0,

In [18]:
mad = madrid_location['coordinates']

In [20]:
madrid_ = []
for i in range(len(queries)):
    for x in range(len(madrid[queries[i]]['response']['groups'][0]['items'])):
        madrid_.append(fc.clean_data(madrid[queries[i]]['response']['groups'][0]['items'][x]['venue'], mad))
       

In [None]:
# madrid_

In [21]:
sf = san_francisco_location['coordinates']

In [22]:
san_francisco_ = []
for i in range(len(queries)):
    for x in range(len(san_francisco[queries[i]]['response']['groups'][0]['items'])):
        san_francisco_.append(fc.clean_data(san_francisco[queries[i]]['response']['groups'][0]['items'][x]['venue'], sf))

In [None]:
# san_francisco_

In [23]:
ta = tel_aviv_location['coordinates']

In [24]:
tel_aviv_ = []
for i in range(len(queries)):
    for x in range(len(tel_aviv[queries[i]]['response']['groups'][0]['items'])):
        tel_aviv_.append(fc.clean_data(tel_aviv[queries[i]]['response']['groups'][0]['items'][x]['venue'], ta))

In [25]:
# tel_aviv_

## III. Converting my data to Pandas & Geopandas

#### To visualize in a easier way my information, I have converted my info to Pandas. This will also then allow my to import the data to Mongo in a better way.

In [26]:
madrid_df = pd.DataFrame(madrid_)
madrid_df.sample(5)

Unnamed: 0,name,lat,lng,shortName,location
25,Pop'n'Roll,40.447001,-3.66368,Rock Club,"{'type': 'Point', 'coordinates': [40.447001447..."
38,Starbucks,40.433564,-3.686829,Coffee Shop,"{'type': 'Point', 'coordinates': [40.433563917..."
86,AVE Puerta 13 Planta Baja,40.406242,-3.690179,Train Station,"{'type': 'Point', 'coordinates': [40.406242, -..."
70,Estación del Norte,40.42123,-3.719121,Train Station,"{'type': 'Point', 'coordinates': [40.421230335..."
28,Maloney's Madrid,40.439339,-3.693051,Pub,"{'type': 'Point', 'coordinates': [40.439339291..."


In [27]:
type(madrid_df)

pandas.core.frame.DataFrame

In [28]:
madrid_df.sample()

Unnamed: 0,name,lat,lng,shortName,location
16,La Fontanilla,40.413733,-3.708182,Pub,"{'type': 'Point', 'coordinates': [40.413732765..."


In [30]:
madrid_gdf = gpd.GeoDataFrame(madrid_df, geometry = gpd.points_from_xy(madrid_df.lng, madrid_df.lat))


In [31]:
madrid_gdf.sample()

Unnamed: 0,name,lat,lng,shortName,location,geometry
0,The Irish Rover,40.454786,-3.69377,Pub,"{'type': 'Point', 'coordinates': [40.454786122...",POINT (-3.69377 40.45479)


In [32]:
madrid_gdf.drop('location', axis=1, inplace=True)

In [33]:
madrid_gdf.sample()

Unnamed: 0,name,lat,lng,shortName,geometry
10,Fogg Birra and Cheese,40.412275,-3.697988,Pub,POINT (-3.69799 40.41228)


In [34]:
san_francisco_df = pd.DataFrame(san_francisco_)
san_francisco_df.sample()

Unnamed: 0,name,lat,lng,shortName,location
17,South San Francisco Caltrain Station,37.657019,-122.404947,Train Station,"{'type': 'Point', 'coordinates': [37.657018648..."


In [35]:
san_francisco_gdf = gpd.GeoDataFrame(san_francisco_df, geometry = gpd.points_from_xy(san_francisco_df.lng, san_francisco_df.lat))
san_francisco_gdf.drop('location', axis = 1, inplace = True)
san_francisco_gdf.sample()


Unnamed: 0,name,lat,lng,shortName,geometry
18,Caltrain #139,37.707563,-122.401732,Train,POINT (-122.40173 37.70756)


In [36]:
tel_aviv_df = pd.DataFrame(tel_aviv_)
tel_aviv_gdf = gpd.GeoDataFrame(tel_aviv_df, geometry = gpd.points_from_xy(tel_aviv_df.lng, tel_aviv_df.lat))
tel_aviv_gdf.drop('location', axis = 1, inplace = True)
tel_aviv_gdf.sample()


Unnamed: 0,name,lat,lng,shortName,geometry
55,HaHagana Train Station (תחנת רכבת תל אביב ההגנה),32.05393,34.78468,Train Station,POINT (34.78468 32.05393)


## IV. Importing the data to Mongo

In [37]:
client = MongoClient("localhost:27017")
db = client.get_database("geospatial_project_ironhack")
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'geospatial_project_ironhack')

In [39]:
madrid = db.get_collection("madrid")
san_francisco = db.get_collection("san_francisco")
tel_aviv = db.get_collection("tel_aviv")

In [40]:
madrid_gdf['geometry']=madrid_gdf['geometry'].apply(lambda x:shapely.geometry.mapping(x))



In [41]:
madrid_dic = madrid_gdf.to_dict(orient="records")

In [42]:
madrid.create_index([("geometry", GEOSPHERE)])

'geometry_2dsphere'

In [43]:
madrid.insert_many(madrid_dic)

<pymongo.results.InsertManyResult at 0x7fe652ae0ec0>

In [44]:
san_francisco_gdf['geometry']=san_francisco_gdf['geometry'].apply(lambda x:shapely.geometry.mapping(x))



In [45]:
san_francisco_dic = san_francisco_gdf.to_dict(orient="records")

In [46]:
san_francisco.create_index([("geometry", GEOSPHERE)])

'geometry_2dsphere'

In [47]:
san_francisco.insert_many(san_francisco_dic)

<pymongo.results.InsertManyResult at 0x7fe6531b2b40>

In [48]:
tel_aviv_gdf['geometry']=tel_aviv_gdf['geometry'].apply(lambda x:shapely.geometry.mapping(x))



In [49]:
tel_aviv_dic = tel_aviv_gdf.to_dict(orient="records")

In [50]:
tel_aviv.create_index([("geometry", GEOSPHERE)])

'geometry_2dsphere'

In [51]:
tel_aviv.insert_many(tel_aviv_dic)

<pymongo.results.InsertManyResult at 0x7fe6531c5380>