# CityBikes

### Importing Packages

In [2]:
import requests
import os

In [3]:
import pandas as pd
import numpy as np
import json
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

### Exploring the structure of the API and understanding the data returned from querying the API 

To do this, first I will pull data from the citybikes API and store the JSON file in a variable and use that to check if the response works. 

In [4]:
url_one = 'http://api.citybik.es/v2/networks'
bike_request = requests.get(url_one)
bike_request

<Response [200]>

In [5]:
bike_request_json = bike_request.json()

This JSON file is a list of nested dictionaires consisting of the companies responsible for bike rentals.

Send a request to CityBikes for the city of your choice. 

Now, I will loop through the JSON file to isolate the bike comapny for running the CityBikes program in Bhopal 

In [6]:
bhopal_city_bike_companies = [] # create an empty list for city bike companies
for i in range(len(bike_request_json['networks'])): #a for loop to iterate through the json file
    if 'Bhopal' in bike_request_json['networks'][i]['location']['city']: #extracting the city for the company 
        bhopal_city_bike_companies.append(bike_request_json['networks'][i]['company']) #appending the company based in Bhopal to the list

In [7]:
bhopal_city_bike_companies

[['CHARTERED BIKE PRIVATE LIMITED']]

There is only one network company in Bhopal that administers this city bikes program. Now, I will use the network ID I acquired from the request above to pull station data.

Parse through the response to get the details you want for the bike stations in that city (latitude, longitude, number of bikes). 

Now to pull station data for this specific company in Bhopal, I will create a new API request with this particular network ID I have acquired as the endpoint. 

The netword id for Bhopal is 'chartered-bike-bhopal'

In [8]:
url_two = 'http://api.citybik.es/v2/networks/chartered-bike-bhopal'
station_request = requests.get(url_two)
station_request

<Response [200]>

In [9]:
stations = station_request.json()

In [10]:
#To access the details, we parse the JSON file
stations['network']['stations']

[{'empty_slots': 2,
  'extra': {'ebikes': 0, 'number': 9871, 'online': True, 'uid': 122},
  'free_bikes': 4,
  'id': 'b371ef2499d92f6f39aecd14aa834f73',
  'latitude': 23.222624,
  'longitude': 77.424051,
  'name': 'NANDAN KANAN PARK',
  'timestamp': '2024-04-17T00:16:33.608000Z'},
 {'empty_slots': 3,
  'extra': {'ebikes': 0, 'number': 9868, 'online': True, 'uid': 119},
  'free_bikes': 3,
  'id': '9ca6adc741b5a7e4e2914913ba967604',
  'latitude': 23.275564,
  'longitude': 77.454774,
  'name': 'FOUNTAIN PARK MINAAL',
  'timestamp': '2024-04-17T00:16:33.607000Z'},
 {'empty_slots': 1,
  'extra': {'ebikes': 0, 'number': 9880, 'online': True, 'uid': 131},
  'free_bikes': 5,
  'id': '154a47ad488f8c67ea69615613277002',
  'latitude': 23.230224,
  'longitude': 77.485648,
  'name': 'AVADHPURI TIRAHA',
  'timestamp': '2024-04-17T00:16:33.616000Z'},
 {'empty_slots': 5,
  'extra': {'ebikes': 0, 'number': 9866, 'online': True, 'uid': 117},
  'free_bikes': 3,
  'id': '36761a60c92613a6e5f7abc3e89e3b8c',

Put your parsed results into a DataFrame.

First, to better understand the structure of the dataframe that will be created, I will use a singular entry to confirm the column names of the dataframe 

In [11]:
stations['network']['stations'][5]

{'empty_slots': 0,
 'extra': {'ebikes': 0, 'number': 9873, 'online': True, 'uid': 124},
 'free_bikes': 6,
 'id': '0dbfb7907fd0bb62822f16e53057697f',
 'latitude': 23.251645,
 'longitude': 77.456501,
 'name': 'J.K SQUARE ',
 'timestamp': '2024-04-17T00:16:33.625000Z'}

In [12]:
stations_list = []  #an empty list for stations
for station in stations['network']['stations']: #a for loop to iterate through the json file
   stations_data = {
    'station_id': station['id'],
    'station_name': station['name'],
    'latitude': station['latitude'],
    'longitude': station['longitude'],
    'empty_slots': station['empty_slots'],
    'free_bikes': station['free_bikes'],
    'e_bikes': station['extra']['ebikes'],
    'timestamp': station['timestamp'],
    'station_uid': station['extra']['uid'],
    'station_number': station['extra']['number'],
    'station_online_status': station['extra']['online']
   }
   stations_list.append(stations_data) #appending the data to the empty list

#Dataframe
stations_df = pd.DataFrame(stations_list)   #converting the list to a dataframe


In [13]:
stations_df.head()

Unnamed: 0,station_id,station_name,latitude,longitude,empty_slots,free_bikes,e_bikes,timestamp,station_uid,station_number,station_online_status
0,b371ef2499d92f6f39aecd14aa834f73,NANDAN KANAN PARK,23.222624,77.424051,2,4,0,2024-04-17T00:16:33.608000Z,122,9871,True
1,9ca6adc741b5a7e4e2914913ba967604,FOUNTAIN PARK MINAAL,23.275564,77.454774,3,3,0,2024-04-17T00:16:33.607000Z,119,9868,True
2,154a47ad488f8c67ea69615613277002,AVADHPURI TIRAHA,23.230224,77.485648,1,5,0,2024-04-17T00:16:33.616000Z,131,9880,True
3,36761a60c92613a6e5f7abc3e89e3b8c,AKRITI BLUE SKY,23.167347,77.438082,5,3,0,2024-04-17T00:16:33.609000Z,117,9866,True
4,e2a4ce935da75f9eb2d0730033ddc5bb,VARDHMAAN PARK,23.249605,77.395605,0,0,1,2024-04-17T00:16:33.612000Z,75,9823,True


In [14]:
stations_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74 entries, 0 to 73
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   station_id             74 non-null     object 
 1   station_name           74 non-null     object 
 2   latitude               74 non-null     float64
 3   longitude              74 non-null     float64
 4   empty_slots            74 non-null     int64  
 5   free_bikes             74 non-null     int64  
 6   e_bikes                74 non-null     int64  
 7   timestamp              74 non-null     object 
 8   station_uid            74 non-null     int64  
 9   station_number         74 non-null     int64  
 10  station_online_status  74 non-null     bool   
dtypes: bool(1), float64(2), int64(5), object(3)
memory usage: 6.0+ KB


In [15]:
stations_df['timestamp'] = pd.to_datetime(stations_df['timestamp']) #converting the timestamp to a datetime object
stations_df['timestamp']

0    2024-04-17 00:16:33.608000+00:00
1    2024-04-17 00:16:33.607000+00:00
2    2024-04-17 00:16:33.616000+00:00
3    2024-04-17 00:16:33.609000+00:00
4    2024-04-17 00:16:33.612000+00:00
                   ...               
69   2024-04-17 00:16:33.728000+00:00
70   2024-04-17 00:16:33.729000+00:00
71   2024-04-17 00:16:33.729000+00:00
72   2024-04-17 00:16:33.727000+00:00
73   2024-04-17 00:16:33.641000+00:00
Name: timestamp, Length: 74, dtype: datetime64[ns, UTC]

In [16]:
stations_df.describe()

Unnamed: 0,latitude,longitude,empty_slots,free_bikes,e_bikes,station_uid,station_number
count,74.0,74.0,74.0,74.0,74.0,74.0,74.0
mean,23.22414,77.424794,9.432432,4.324324,0.040541,119.162162,9846.905405
std,0.025375,0.031126,58.484299,3.515555,0.348743,75.795512,46.848684
min,23.164564,77.341134,0.0,0.0,-1.0,55.0,9703.0
25%,23.210364,77.400497,1.0,2.25,0.0,76.25,9822.25
50%,23.225606,77.432152,2.0,4.0,0.0,99.5,9845.5
75%,23.23936,77.452415,4.0,5.75,0.0,120.75,9868.75
max,23.286264,77.485648,505.0,25.0,2.0,420.0,10054.0


In [17]:
stations_df['station_online_status'].value_counts()

station_online_status
True    74
Name: count, dtype: int64

In [18]:
#Check if all stations IDs are unique
stations_df['station_id'].nunique()

74

In [19]:
stations_df.to_csv('bhopal_bike_stations.csv', index=False) #saving the dataframe to a csv file