## Acquiring live data by API of Dublin Cycles and connecting it to mongodb 

### Loading required libraries 

In [1]:
import requests
import json
import pandas as pd

In [2]:
#Defining API key
api_key = "9c31defb68c5ac281ef53ad13569fa29d295e408"

In [3]:
#Defining endpoint/url
endpoint = f"https://api.jcdecaux.com/vls/v1/stations?contract=dublin&apiKey={api_key}"

In [4]:
# Requesting to get the data through API link
resp = requests.get(endpoint)

In [5]:
# getting response [200], hence we are getting the live data from API
resp

<Response [200]>

In [6]:
#checking the content of our data fetched through API
resp.content

b'[{"number":42,"contract_name":"dublin","name":"SMITHFIELD NORTH","address":"Smithfield North","position":{"lat":53.349562,"lng":-6.278198},"banking":false,"bonus":false,"bike_stands":30,"available_bike_stands":27,"available_bikes":3,"status":"OPEN","last_update":1712839473000},{"number":30,"contract_name":"dublin","name":"PARNELL SQUARE NORTH","address":"Parnell Square North","position":{"lat":53.3537415547453,"lng":-6.26530144781526},"banking":false,"bonus":false,"bike_stands":20,"available_bike_stands":18,"available_bikes":1,"status":"OPEN","last_update":1712839506000},{"number":54,"contract_name":"dublin","name":"CLONMEL STREET","address":"Clonmel Street","position":{"lat":53.336021,"lng":-6.26298},"banking":false,"bonus":false,"bike_stands":33,"available_bike_stands":29,"available_bikes":4,"status":"OPEN","last_update":1712839542000},{"number":108,"contract_name":"dublin","name":"AVONDALE ROAD","address":"Avondale Road","position":{"lat":53.359405,"lng":-6.276142},"banking":false

In [7]:
#getting the data in json format
data = resp.json()

In [8]:
#converting the data into dataframe through pandas
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,number,contract_name,name,address,position,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update
0,42,dublin,SMITHFIELD NORTH,Smithfield North,"{'lat': 53.349562, 'lng': -6.278198}",False,False,30,27,3,OPEN,1712839473000
1,30,dublin,PARNELL SQUARE NORTH,Parnell Square North,"{'lat': 53.3537415547453, 'lng': -6.2653014478...",False,False,20,18,1,OPEN,1712839506000
2,54,dublin,CLONMEL STREET,Clonmel Street,"{'lat': 53.336021, 'lng': -6.26298}",False,False,33,29,4,OPEN,1712839542000
3,108,dublin,AVONDALE ROAD,Avondale Road,"{'lat': 53.359405, 'lng': -6.276142}",False,False,35,22,13,OPEN,1712839422000
4,20,dublin,JAMES STREET EAST,James Street East,"{'lat': 53.336597, 'lng': -6.248109}",False,False,30,20,10,OPEN,1712839561000
...,...,...,...,...,...,...,...,...,...,...,...,...
109,39,dublin,WILTON TERRACE,Wilton Terrace,"{'lat': 53.332383, 'lng': -6.252717}",False,False,20,20,0,OPEN,1712839496000
110,83,dublin,EMMET ROAD,Emmet Road,"{'lat': 53.340714, 'lng': -6.308191}",False,False,40,31,9,OPEN,1712839465000
111,92,dublin,HEUSTON BRIDGE (NORTH),Heuston Bridge (North),"{'lat': 53.347802, 'lng': -6.292432}",False,False,40,36,4,OPEN,1712839445000
112,21,dublin,LEINSTER STREET SOUTH,Leinster Street South,"{'lat': 53.34218, 'lng': -6.254485}",False,False,30,9,21,OPEN,1712839467000


### We can observe from the data that last_update time is a timestamp with time represented in milisecond, we will write a function to convert it into human readable format

In [10]:
#Defining a function to convert timestamp into data and time
from datetime import datetime

def time_stamp_convert(time_ms):
    time_s = time_ms/1000   #converting miliseconds time to seconds
    return datetime.fromtimestamp(time_s).strftime('%Y-%m-%d %H:%M:%S')

In [11]:
#appplying the conversion on last_update column
df['last_update'] = df['last_update'].apply(time_stamp_convert)

#### We can observe that the timestamp has been converted to human readable format

In [12]:
df

Unnamed: 0,number,contract_name,name,address,position,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update
0,42,dublin,SMITHFIELD NORTH,Smithfield North,"{'lat': 53.349562, 'lng': -6.278198}",False,False,30,27,3,OPEN,2024-04-11 13:44:33
1,30,dublin,PARNELL SQUARE NORTH,Parnell Square North,"{'lat': 53.3537415547453, 'lng': -6.2653014478...",False,False,20,18,1,OPEN,2024-04-11 13:45:06
2,54,dublin,CLONMEL STREET,Clonmel Street,"{'lat': 53.336021, 'lng': -6.26298}",False,False,33,29,4,OPEN,2024-04-11 13:45:42
3,108,dublin,AVONDALE ROAD,Avondale Road,"{'lat': 53.359405, 'lng': -6.276142}",False,False,35,22,13,OPEN,2024-04-11 13:43:42
4,20,dublin,JAMES STREET EAST,James Street East,"{'lat': 53.336597, 'lng': -6.248109}",False,False,30,20,10,OPEN,2024-04-11 13:46:01
...,...,...,...,...,...,...,...,...,...,...,...,...
109,39,dublin,WILTON TERRACE,Wilton Terrace,"{'lat': 53.332383, 'lng': -6.252717}",False,False,20,20,0,OPEN,2024-04-11 13:44:56
110,83,dublin,EMMET ROAD,Emmet Road,"{'lat': 53.340714, 'lng': -6.308191}",False,False,40,31,9,OPEN,2024-04-11 13:44:25
111,92,dublin,HEUSTON BRIDGE (NORTH),Heuston Bridge (North),"{'lat': 53.347802, 'lng': -6.292432}",False,False,40,36,4,OPEN,2024-04-11 13:44:05
112,21,dublin,LEINSTER STREET SOUTH,Leinster Street South,"{'lat': 53.34218, 'lng': -6.254485}",False,False,30,9,21,OPEN,2024-04-11 13:44:27


#### We can also observe that the position column is also in latitude and logitude. Hence converting them into human readable format 

#### For this conversion we will be using Nominatim API : https://nominatim.org/release-docs/latest/api/Reverse/

In [13]:
#Splitting the position values and assigning it to two respective columns - latitude and longitude
df[['latitude', 'longitude']] = df['position'].apply(pd.Series)

In [14]:
df

Unnamed: 0,number,contract_name,name,address,position,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,latitude,longitude
0,42,dublin,SMITHFIELD NORTH,Smithfield North,"{'lat': 53.349562, 'lng': -6.278198}",False,False,30,27,3,OPEN,2024-04-11 13:44:33,53.349562,-6.278198
1,30,dublin,PARNELL SQUARE NORTH,Parnell Square North,"{'lat': 53.3537415547453, 'lng': -6.2653014478...",False,False,20,18,1,OPEN,2024-04-11 13:45:06,53.353742,-6.265301
2,54,dublin,CLONMEL STREET,Clonmel Street,"{'lat': 53.336021, 'lng': -6.26298}",False,False,33,29,4,OPEN,2024-04-11 13:45:42,53.336021,-6.262980
3,108,dublin,AVONDALE ROAD,Avondale Road,"{'lat': 53.359405, 'lng': -6.276142}",False,False,35,22,13,OPEN,2024-04-11 13:43:42,53.359405,-6.276142
4,20,dublin,JAMES STREET EAST,James Street East,"{'lat': 53.336597, 'lng': -6.248109}",False,False,30,20,10,OPEN,2024-04-11 13:46:01,53.336597,-6.248109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,39,dublin,WILTON TERRACE,Wilton Terrace,"{'lat': 53.332383, 'lng': -6.252717}",False,False,20,20,0,OPEN,2024-04-11 13:44:56,53.332383,-6.252717
110,83,dublin,EMMET ROAD,Emmet Road,"{'lat': 53.340714, 'lng': -6.308191}",False,False,40,31,9,OPEN,2024-04-11 13:44:25,53.340714,-6.308191
111,92,dublin,HEUSTON BRIDGE (NORTH),Heuston Bridge (North),"{'lat': 53.347802, 'lng': -6.292432}",False,False,40,36,4,OPEN,2024-04-11 13:44:05,53.347802,-6.292432
112,21,dublin,LEINSTER STREET SOUTH,Leinster Street South,"{'lat': 53.34218, 'lng': -6.254485}",False,False,30,9,21,OPEN,2024-04-11 13:44:27,53.342180,-6.254485


#### Writing a function to reverse geocode from latitude and longitude to a human readable address

In [15]:
def reverse_geocode(lat, lng):
    Nominatim_url = f"https://nominatim.openstreetmap.org/reverse?lat={lat}&lon={lng}&format=json"
    response = requests.get(Nominatim_url)
    data = response.json()
    return data.get('display_name')

In [16]:
df['position_address'] = df.apply(lambda row: reverse_geocode(row['latitude'], row['longitude']), axis=1)

### We can observe below that the postion column has been converted into exact address extracted from coordinates and reversed geo to readable format

In [17]:
df

Unnamed: 0,number,contract_name,name,address,position,banking,bonus,bike_stands,available_bike_stands,available_bikes,status,last_update,latitude,longitude,position_address
0,42,dublin,SMITHFIELD NORTH,Smithfield North,"{'lat': 53.349562, 'lng': -6.278198}",False,False,30,27,3,OPEN,2024-04-11 13:44:33,53.349562,-6.278198,"Smithfield, Arran Quay C ED, Dublin, County Du..."
1,30,dublin,PARNELL SQUARE NORTH,Parnell Square North,"{'lat': 53.3537415547453, 'lng': -6.2653014478...",False,False,20,18,1,OPEN,2024-04-11 13:45:06,53.353742,-6.265301,"Parnell Square North, Rotunda B Ward 1986, Dub..."
2,54,dublin,CLONMEL STREET,Clonmel Street,"{'lat': 53.336021, 'lng': -6.26298}",False,False,33,29,4,OPEN,2024-04-11 13:45:42,53.336021,-6.262980,"Harcourt Creche, 1, Clonmel Street, Saint Kevi..."
3,108,dublin,AVONDALE ROAD,Avondale Road,"{'lat': 53.359405, 'lng': -6.276142}",False,False,35,22,13,OPEN,2024-04-11 13:43:42,53.359405,-6.276142,"Avondale Road, Arran Quay A ED, Dublin, County..."
4,20,dublin,JAMES STREET EAST,James Street East,"{'lat': 53.336597, 'lng': -6.248109}",False,False,30,20,10,OPEN,2024-04-11 13:46:01,53.336597,-6.248109,"49, Baggot Street Lower, South Dock ED, Dublin..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,39,dublin,WILTON TERRACE,Wilton Terrace,"{'lat': 53.332383, 'lng': -6.252717}",False,False,20,20,0,OPEN,2024-04-11 13:44:56,53.332383,-6.252717,"Wilton Terrace, South Dock ED, Dublin, County ..."
110,83,dublin,EMMET ROAD,Emmet Road,"{'lat': 53.340714, 'lng': -6.308191}",False,False,40,31,9,OPEN,2024-04-11 13:44:25,53.340714,-6.308191,"Emmet Road, Kilmainham C Ward 1986, Dublin, Co..."
111,92,dublin,HEUSTON BRIDGE (NORTH),Heuston Bridge (North),"{'lat': 53.347802, 'lng': -6.292432}",False,False,40,36,4,OPEN,2024-04-11 13:44:05,53.347802,-6.292432,"Parkgate Street, Islandbridge, Phoenix Park ED..."
112,21,dublin,LEINSTER STREET SOUTH,Leinster Street South,"{'lat': 53.34218, 'lng': -6.254485}",False,False,30,9,21,OPEN,2024-04-11 13:44:27,53.342180,-6.254485,"Leinster Street South, Mansion House A Ward 19..."


In [18]:
#pruning columns not needed
df = df.drop(columns=["position","banking","bonus","latitude","longitude"])

In [19]:
df

Unnamed: 0,number,contract_name,name,address,bike_stands,available_bike_stands,available_bikes,status,last_update,position_address
0,42,dublin,SMITHFIELD NORTH,Smithfield North,30,27,3,OPEN,2024-04-11 13:44:33,"Smithfield, Arran Quay C ED, Dublin, County Du..."
1,30,dublin,PARNELL SQUARE NORTH,Parnell Square North,20,18,1,OPEN,2024-04-11 13:45:06,"Parnell Square North, Rotunda B Ward 1986, Dub..."
2,54,dublin,CLONMEL STREET,Clonmel Street,33,29,4,OPEN,2024-04-11 13:45:42,"Harcourt Creche, 1, Clonmel Street, Saint Kevi..."
3,108,dublin,AVONDALE ROAD,Avondale Road,35,22,13,OPEN,2024-04-11 13:43:42,"Avondale Road, Arran Quay A ED, Dublin, County..."
4,20,dublin,JAMES STREET EAST,James Street East,30,20,10,OPEN,2024-04-11 13:46:01,"49, Baggot Street Lower, South Dock ED, Dublin..."
...,...,...,...,...,...,...,...,...,...,...
109,39,dublin,WILTON TERRACE,Wilton Terrace,20,20,0,OPEN,2024-04-11 13:44:56,"Wilton Terrace, South Dock ED, Dublin, County ..."
110,83,dublin,EMMET ROAD,Emmet Road,40,31,9,OPEN,2024-04-11 13:44:25,"Emmet Road, Kilmainham C Ward 1986, Dublin, Co..."
111,92,dublin,HEUSTON BRIDGE (NORTH),Heuston Bridge (North),40,36,4,OPEN,2024-04-11 13:44:05,"Parkgate Street, Islandbridge, Phoenix Park ED..."
112,21,dublin,LEINSTER STREET SOUTH,Leinster Street South,30,9,21,OPEN,2024-04-11 13:44:27,"Leinster Street South, Mansion House A Ward 19..."


## Uploading the data to mongodb server

In [20]:
import pymongo
client = pymongo.MongoClient("mongodb+srv://20014083:Ya4MdlP2d1SPOkPr@cluster-cycles.icvwll2.mongodb.net/?retryWrites=true&w=majority&appName=Cluster-cycles")
db = client["mydata"]
collection=db['listings']

In [21]:
print(client.list_database_names())

['sample_mflix', 'admin', 'local']


#### Converting the data frame to dictionary, appropriate format for mongo db server

In [22]:
data_dic = df.to_dict(orient='records')
collection.insert_many(data_dic)

InsertManyResult([ObjectId('6617dbf14a7d0d8ef6249c38'), ObjectId('6617dbf14a7d0d8ef6249c39'), ObjectId('6617dbf14a7d0d8ef6249c3a'), ObjectId('6617dbf14a7d0d8ef6249c3b'), ObjectId('6617dbf14a7d0d8ef6249c3c'), ObjectId('6617dbf14a7d0d8ef6249c3d'), ObjectId('6617dbf14a7d0d8ef6249c3e'), ObjectId('6617dbf14a7d0d8ef6249c3f'), ObjectId('6617dbf14a7d0d8ef6249c40'), ObjectId('6617dbf14a7d0d8ef6249c41'), ObjectId('6617dbf14a7d0d8ef6249c42'), ObjectId('6617dbf14a7d0d8ef6249c43'), ObjectId('6617dbf14a7d0d8ef6249c44'), ObjectId('6617dbf14a7d0d8ef6249c45'), ObjectId('6617dbf14a7d0d8ef6249c46'), ObjectId('6617dbf14a7d0d8ef6249c47'), ObjectId('6617dbf14a7d0d8ef6249c48'), ObjectId('6617dbf14a7d0d8ef6249c49'), ObjectId('6617dbf14a7d0d8ef6249c4a'), ObjectId('6617dbf14a7d0d8ef6249c4b'), ObjectId('6617dbf14a7d0d8ef6249c4c'), ObjectId('6617dbf14a7d0d8ef6249c4d'), ObjectId('6617dbf14a7d0d8ef6249c4e'), ObjectId('6617dbf14a7d0d8ef6249c4f'), ObjectId('6617dbf14a7d0d8ef6249c50'), ObjectId('6617dbf14a7d0d8ef6249c

#### Fetching the data back and checking if the data is inserted properly

In [23]:
cursor = collection.find()
for document in cursor:
    print(document)

{'_id': ObjectId('6617dbf14a7d0d8ef6249c38'), 'number': 42, 'contract_name': 'dublin', 'name': 'SMITHFIELD NORTH', 'address': 'Smithfield North', 'bike_stands': 30, 'available_bike_stands': 27, 'available_bikes': 3, 'status': 'OPEN', 'last_update': '2024-04-11 13:44:33', 'position_address': 'Smithfield, Arran Quay C ED, Dublin, County Dublin, Leinster, D07 WR63, Éire / Ireland'}
{'_id': ObjectId('6617dbf14a7d0d8ef6249c39'), 'number': 30, 'contract_name': 'dublin', 'name': 'PARNELL SQUARE NORTH', 'address': 'Parnell Square North', 'bike_stands': 20, 'available_bike_stands': 18, 'available_bikes': 1, 'status': 'OPEN', 'last_update': '2024-04-11 13:45:06', 'position_address': 'Parnell Square North, Rotunda B Ward 1986, Dublin, County Dublin, Leinster, D01 E7C1, Éire / Ireland'}
{'_id': ObjectId('6617dbf14a7d0d8ef6249c3a'), 'number': 54, 'contract_name': 'dublin', 'name': 'CLONMEL STREET', 'address': 'Clonmel Street', 'bike_stands': 33, 'available_bike_stands': 29, 'available_bikes': 4, 's

## Creating a pipeline to fetch live data every 60 minutes with transformations on the data and uploading it to mongo db server

### Defining all the function below again for the ease of understanding

In [24]:
#defined function for data fetching from api
def data_fetch():
    api_key = "9c31defb68c5ac281ef53ad13569fa29d295e408"
    endpoint = f"https://api.jcdecaux.com/vls/v1/stations?contract=dublin&apiKey={api_key}"
    response = requests.get(endpoint)
    data_acq = response.json()
    df = pd.DataFrame(data_acq)
    return df

In [25]:
# defining data transformations

def data_transform(data):
    data['last_update'] = data['last_update'].apply(time_stamp_convert)
    data[['latitude', 'longitude']] = data['position'].apply(pd.Series)
    data['position_address'] = data.apply(lambda row: reverse_geocode(row['latitude'], row['longitude']), axis=1)
    data = data.drop(columns=["position","banking","bonus","latitude","longitude"])
    return data

In [26]:
# defining uploading the data to mongo db server
def upload_mongo(data):
    data_dic = data.to_dict(orient='records')
    collection.insert_many(data_dic)

### Defining pipeline

In [29]:
from datetime import datetime
import time

def pipeline():
    # Running the pipeline for 10 hours
    end_time = time.time() + (10 * 60 * 60)  # 10 hours * 60 minutes * 60 seconds

    while time.time() < end_time:
        # Fetching data from API
        print("Fetching data from API.....")
        data = data_fetch()
        print("Fetching success!\n")

        # Transforming data
        print("Applying Transformations....")
        df = data_transform(data)
        print("Transformations Applied!\n")

        # Uploading data to MongoDB
        print("Uploading data to mongo db server...../")
        upload_mongo(df)
        print("Upload Success!\n")

        # Waiting for 10 minutes before fetching data again
        print("Waiting for 10 minutes before fetching data again.....\n\n")      
        time.sleep(600)  # 10 minutes * 60 seconds = 600 seconds

In [None]:
# Running the pipeline
pipeline()

Fetching data from API.....
Fetching success!

Applying Transformations....
Transformations Applied!

Uploading data to mongo db server...../
Upload Success!

Waiting for 10 minutes before fetching data again.....


Fetching data from API.....
Fetching success!

Applying Transformations....
Transformations Applied!

Uploading data to mongo db server...../
Upload Success!

Waiting for 10 minutes before fetching data again.....


Fetching data from API.....
Fetching success!

Applying Transformations....
Transformations Applied!

Uploading data to mongo db server...../
Upload Success!

Waiting for 10 minutes before fetching data again.....


