# IBM Data Science Professional Specialization 

In [3]:
!conda install -c conda-forge beautifulsoup4 --yes

!conda install -c conda-forge geopy --yes

!conda install -c conda-forge folium=0.5.0 --yes

print('Libraries installed!')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - beautifulsoup4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    beautifulsoup4-4.8.0       |           py36_0         144 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.6 MB

The following packages will be UPDATED:

    beautifulsoup4:  4.7.1-py36_1      --> 4.8.0-py36_0         conda-forge
    ca-certificates: 2019.5.15-0       --> 2019.6.16-hecc5488_0 conda-forge
    certifi:         2019.6.16-py36_1  --> 2019.6.16-py36_1     conda-forg

In [6]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests
import json

from bs4 import BeautifulSoup

from geopy.geocoders import Nominatim

import folium
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.preprocessing import StandardScaler, normalize, scale
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score


print('Libraries imported!')


Libraries imported!


In [100]:
page = requests.get("https://www.globalpropertyguide.com/Africa/Nigeria/Price-History").text
soup = BeautifulSoup(page, 'html.parser')

In [101]:
lagos_table = soup.find_all("table", class_="floatcenter")
lagos_table1 = lagos_table[0]

for tab in lagos_table1.find_all("tr", {'class':'ntitle'}): 
    tab.decompose()

lagos_table1.find_all('tr')[13].decompose()

In [102]:
lagos_table1

<table cellspacing="0" class="floatcenter" id="ntable" width="350">
<tbody>

<tr class="nheading">
<td>  </td>
<td>NGN</td>
<td>USD*</td>
<td>Y-O-Y CHANGE (%)</td>
</tr>
<tr class="altrow">
<td>Ikoyi</td>
<td>240,000,000</td>
<td>666,697</td>
<td>20.0</td>
</tr>
<tr>
<td>Ibeju Lekki</td>
<td>28,000,000</td>
<td>77,781</td>
<td>12.0</td>
</tr>
<tr class="altrow">
<td>Lekki</td>
<td>55,000,000</td>
<td>152,785</td>
<td>0.0</td>
</tr>
<tr>
<td>Ajah</td>
<td>45,000,000</td>
<td>125,006</td>
<td>0.0</td>
</tr>
<tr class="altrow">
<td>Gbagada</td>
<td>50,000,000</td>
<td>138,895</td>
<td>0.0</td>
</tr>
<tr>
<td>Surulere</td>
<td>55,000,000</td>
<td>152,785</td>
<td>0.0</td>
</tr>
<tr class="altrow">
<td>Yaba</td>
<td>65,000,000</td>
<td>180,564</td>
<td>0.0</td>
</tr>
<tr>
<td>Victoria Island</td>
<td>105,000,000</td>
<td>291,680</td>
<td class="red">-8.7</td>
</tr>
<tr class="altrow">
<td>Isolo</td>
<td>50,000,000</td>
<td>138,895</td>
<td class="red">-9.1</td>
</tr>
<tr>
<td>Alimosho</td>


In [103]:
neighborhoodList = []
usd = []

for row in lagos_table1.find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        neighborhoodList.append(cells[0].text.rstrip('\n'))
        usd.append(cells[2].text.rstrip('\n'))
        

In [104]:
zippedList =  list(zip(neighborhoodList, usd))
lagos_df = pd.DataFrame(zippedList, columns = ['Neighborhood' , 'Avg Price']) 


In [105]:
lagos_df = lagos_df.drop([0], axis=0)


In [106]:
lagos_df

Unnamed: 0,Neighborhood,Avg Price
1,Ikoyi,666697
2,Ibeju Lekki,77781
3,Lekki,152785
4,Ajah,125006
5,Gbagada,138895
6,Surulere,152785
7,Yaba,180564
8,Victoria Island,291680
9,Isolo,138895
10,Alimosho,69448


### Get the neighborhoods coordinate

In [4]:
# Download the geodata
!wget -q -O 'lagos_geo.json' https://purl.stanford.edu/rn736kz7913
print('Data downloaded!')


Data downloaded!


In [None]:
# Load the json file
with open('lagos_geo.json') as lagos_geo_json:
    lagos_geo_data = json.load(lagos_geo_json)

In [None]:
# Parse the json data into neighborhoods list
neighborhood_geo_list = []
for data in lagos_geo_list:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhood_geo_list.append((
        borough, neighborhood_name, neighborhood_lat, neighborhood_lon
    ))

### Using FourSquare API to get surrounding venues

In [None]:
CLIENT_ID = '4GL2HG0SIRZSH1V5RHCBO5TFJYEK3RUC2IF0BNF0IO2P3A50'
CLIENT_SECRET = 'LJWPP2XFZS2Q52S4GZKFD1JPKPBFXOUVDQCYUEEWEIAZ4Y5Y' 
VERSION = '20180605'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

radius = 500
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(lagos_df['Latitude'], lagos_df['Longitude'], lagos_df['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))