# CityBikes

### Importing Packages

In [1]:
import requests
import os

In [2]:
import pandas as pd
import numpy as np
import json
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

### Exploring the structure of the API and understanding the data returned from querying the API 

To do this, first I will pull data from the citybikes API and store the JSON file in a variable and use that to check if the response works. 

In [3]:
url_one = 'http://api.citybik.es/v2/networks'
bike_request = requests.get(url_one)
bike_request

<Response [200]>

In [4]:
bike_request_json = bike_request.json()

This JSON file is a list of nested dictionaires consisting of the companies responsible for bike rentals.

Send a request to CityBikes for the city of your choice. 

Now, I will loop through the JSON file to isolate the bike comapny for running the CityBikes program in Bhopal 

In [13]:
city_bike_companies = [] # create an empty list for city bike companies
for i in range(len(bike_request_json['networks'])): #a for loop to iterate through the json file
    if 'Gent' in bike_request_json['networks'][i]['location']['city']: #extracting the city for the company 
        city_bike_companies.append(bike_request_json['networks'][i])

city_bike_companies

[{'company': ['Donkey Republic'],
  'gbfs_href': 'https://stables.donkey.bike/api/public/gbfs/donkey_gh/gbfs',
  'href': '/v2/networks/donkey-gh',
  'id': 'donkey-gh',
  'location': {'city': 'Gent',
   'country': 'BE',
   'latitude': 51.05,
   'longitude': 3.7303},
  'name': 'Donkey Republic - Gent'}]

There is only one network company in Gent, Belgium that administers this city bikes program. Now, I will use the network ID I acquired from the request above to pull station data.

Q. Parse through the response to get the details you want for the bike stations in that city (latitude, longitude, number of bikes). 

Now to pull station data for this specific company in Bhopal, I will create a new API request with this particular network ID I have acquired as the endpoint. 

The netword id for Gent is 'donkey-gh'

In [14]:
url_two = 'http://api.citybik.es/v2/networks/donkey-gh'
station_request = requests.get(url_two)
station_request

<Response [200]>

In [15]:
stations = station_request.json()

In [16]:
#To access the details, we parse the JSON file
stations['network']['stations']

[{'empty_slots': 2,
  'extra': {'last_updated': 1713387373,
   'renting': 1,
   'returning': 1,
   'uid': '23997'},
  'free_bikes': 1,
  'id': 'fedd9729d9183b05bb4a3bd9da7d7de1',
  'latitude': 51.0602571,
  'longitude': 3.7293461,
  'name': 'Nieuwland 1',
  'timestamp': '2024-04-18T00:29:30.343000Z'},
 {'empty_slots': 9,
  'extra': {'last_updated': 1713358694,
   'renting': 1,
   'returning': 1,
   'uid': '23991'},
  'free_bikes': 3,
  'id': '360b8beb20716998aea9c9a9bbe9720f',
  'latitude': 51.0236911,
  'longitude': 3.6892353,
  'name': 'IKEA',
  'timestamp': '2024-04-18T00:29:30.349000Z'},
 {'empty_slots': 2,
  'extra': {'last_updated': 1713165557,
   'renting': 1,
   'returning': 1,
   'uid': '12392'},
  'free_bikes': 4,
  'id': 'c41d2c76618e7fa70fb587d1c2f0497d',
  'latitude': 51.0630685,
  'longitude': 3.7159931,
  'name': 'Opgeëistenlaan 401',
  'timestamp': '2024-04-18T00:29:30.350000Z'},
 {'empty_slots': 0,
  'extra': {'last_updated': 1713297425,
   'renting': 1,
   'returning'

Put your parsed results into a DataFrame.

First, to better understand the structure of the dataframe that will be created, I will use a singular entry to confirm the column names of the dataframe 

In [17]:
stations['network']['stations'][5]

{'empty_slots': 3,
 'extra': {'last_updated': 1713376944,
  'renting': 1,
  'returning': 1,
  'uid': '13162'},
 'free_bikes': 0,
 'id': 'dfb4810ce36bce22c088da09bc37132d',
 'latitude': 51.0655317,
 'longitude': 3.6993996,
 'name': 'Groendreef 133',
 'timestamp': '2024-04-18T00:29:30.368000Z'}

In [22]:
stations_list = []  #an empty list for stations
for station in stations['network']['stations']: #a for loop to iterate through the json file
   stations_data = {
    'station_id': station['id'],
    'station_name': station['name'],
    'latitude': station['latitude'],
    'longitude': station['longitude'],
    'empty_slots': station['empty_slots'],
    'free_bikes': station['free_bikes'],
    'timestamp': station['timestamp'],
    'station_uid': station['extra']['uid'],
    'renting': station['extra']['renting'],
    'returning': station['extra']['returning'],
    'last_updated': station['extra']['last_updated']
   }
   stations_list.append(stations_data) #appending the data to the empty list

#Dataframe
stations_df = pd.DataFrame(stations_list)   #converting the list to a dataframe


In [23]:
stations_df.head()

Unnamed: 0,station_id,station_name,latitude,longitude,empty_slots,free_bikes,timestamp,station_uid,renting,returning,last_updated
0,fedd9729d9183b05bb4a3bd9da7d7de1,Nieuwland 1,51.060257,3.729346,2,1,2024-04-18T00:29:30.343000Z,23997,1,1,1713387373
1,360b8beb20716998aea9c9a9bbe9720f,IKEA,51.023691,3.689235,9,3,2024-04-18T00:29:30.349000Z,23991,1,1,1713358694
2,c41d2c76618e7fa70fb587d1c2f0497d,Opgeëistenlaan 401,51.063068,3.715993,2,4,2024-04-18T00:29:30.350000Z,12392,1,1,1713165557
3,0af3043c5906684b3afc97f387c94430,Land van Waaslaan 128,51.058521,3.747583,0,3,2024-04-18T00:29:30.361000Z,13169,1,1,1713297425
4,31578db5c5ecc8491f076cf2023ccc7d,Wittemolenstraat 40,51.056576,3.74953,2,1,2024-04-18T00:29:30.365000Z,13245,1,1,1713385625


In [24]:
stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406 entries, 0 to 405
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   station_id    406 non-null    object 
 1   station_name  406 non-null    object 
 2   latitude      406 non-null    float64
 3   longitude     406 non-null    float64
 4   empty_slots   406 non-null    int64  
 5   free_bikes    406 non-null    int64  
 6   timestamp     406 non-null    object 
 7   station_uid   406 non-null    object 
 8   renting       406 non-null    int64  
 9   returning     406 non-null    int64  
 10  last_updated  406 non-null    int64  
dtypes: float64(2), int64(5), object(4)
memory usage: 35.0+ KB


In [28]:
stations_df['timestamp'] = pd.to_datetime(stations_df['timestamp'], format='ISO8601') #converting the timestamp to a datetime object
stations_df['timestamp']

0     2024-04-18 00:29:30.343000+00:00
1     2024-04-18 00:29:30.349000+00:00
2     2024-04-18 00:29:30.350000+00:00
3     2024-04-18 00:29:30.361000+00:00
4     2024-04-18 00:29:30.365000+00:00
                    ...               
401   2024-04-18 00:29:31.203000+00:00
402   2024-04-18 00:29:30.498000+00:00
403   2024-04-18 00:29:30.675000+00:00
404   2024-04-18 00:29:30.680000+00:00
405   2024-04-18 00:29:31.346000+00:00
Name: timestamp, Length: 406, dtype: datetime64[ns, UTC]

In [29]:
stations_df.describe()

Unnamed: 0,latitude,longitude,empty_slots,free_bikes,renting,returning,last_updated
count,406.0,406.0,406.0,406.0,406.0,406.0,406.0
mean,51.049707,3.722364,4.280788,1.312808,1.0,1.0,1713262000.0
std,0.012877,0.017117,3.618195,1.911159,0.0,0.0,383641.9
min,50.99819,3.676415,0.0,0.0,1.0,1.0,1708365000.0
25%,51.041699,3.710837,2.0,0.0,1.0,1.0,1713272000.0
50%,51.050625,3.72311,3.0,1.0,1.0,1.0,1713365000.0
75%,51.05735,3.732759,6.0,2.0,1.0,1.0,1713380000.0
max,51.099393,3.767021,24.0,14.0,1.0,1.0,1713400000.0


We can see that renting and returning is basically always a 1 as the standard deviation is 0. So renting and returning are redundant variables here. 
The format of last updated is also strange. The timesetap is already telling us when the station was last updated on the system, therefore I am not sure what that variable is for. 

In [31]:
stations_df['free_bikes'].value_counts()

free_bikes
0     169
1     114
2      55
3      35
4      13
5       8
9       3
6       3
11      2
8       1
13      1
14      1
10      1
Name: count, dtype: int64

In [33]:
stations_df['empty_slots'].value_counts()

empty_slots
3     122
2      71
6      47
1      35
5      25
0      22
4      20
10     11
8       9
12      9
9       8
11      7
7       7
16      3
13      3
21      2
24      1
14      1
18      1
15      1
20      1
Name: count, dtype: int64

In [30]:
#Check if all stations IDs are unique
stations_df['station_id'].nunique()

406

In [32]:
stations_df.to_csv('gent_bike_stations.csv', index=False) #saving the dataframe to a csv file