# Ahmedabad Housing Analysis
###       By Maharshi Pandya



The First step is to import all the necessary libraries into our notebook

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np 
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
conda install -c conda-forge folium --yes 
import folium 

Solving environment: done

# All requested packages already installed.



In this step, we are going to scrap data for our project and assign it to variable named d1

In [2]:
!wget -q -O 'sample.html' https://www.makaan.com/price-trends/property-rates-for-buy-in-ahmedabad
print('Data downloaded!')
with open('sample.html') as Sampledata:
    soup = BeautifulSoup(Sampledata , 'lxml')
d1 = soup.find('div' , class_ = 'locality-trends max980')

Data downloaded!


Here, We are going to isolate data and going to create 4 indepedant list of our data.

In [3]:
data=[]
locality =[]
price_range =[]
avg_price=[]
rise_price =[]
for row in d1.find_all('tr'):
    for cell in row.find_all('td'):
        data.append(cell.text)
    if(len(data)>5):
        locality.append(data[0])
        price_range.append(data[1])
        avg_price.append(data[2])
        rise_price.append(data[3].rstrip('\n'))
    data=[]
  

In this step, we are making a data frame of a data from above

In [4]:
amdavad = pd.DataFrame({"Borough" : locality , "Price Range" : price_range , "Average Price":avg_price, "Rise Price" : rise_price})
amdavad['Price Range'].replace('-',np.nan,inplace=True)
amdavad['Average Price'].replace('-',np.nan,inplace=True)
amdavad.head()

Unnamed: 0,Borough,Price Range,Average Price,Rise Price
0,Bopal,"393 - 64,516 / sqft","12,367.37 / sqft",24.5%
1,Prahlad Nagar,"2,381 - 14,842 / sqft","11,189.54 / sqft",20.9%
2,Satellite,"1,895 - 44,922 / sqft","30,112.94 / sqft",56.9%
3,Shela,"2,400 - 6,790 / sqft","6,32.26 / sqft",22.3%
4,Gota,"2,346 - 14,706 / sqft","9,472.09 / sqft",9.3%


In this step, we are sorting data according to their average proce from lowest to highest

In [5]:
sorted_city= amdavad.sort_values(by=['Average Price'])
sorted_city.head()

Unnamed: 0,Borough,Price Range,Average Price,Rise Price
155,Nikol,"600 - 1,200 / sqft","1,200 / sqft",486%
19,Dholera,"1,610 - 1,673 / sqft","1,642.77 / sqft",-
227,Vatva,"1,667 / sqft","1,666.67 / sqft",-
228,Naroda,"1,778 / sqft","1,778.5 / sqft",-
206,Ahmedabad Patan Highway Road,"1,939 / sqft","1,939.39 / sqft",-


Here ,we are finding the average location of ahmedabad city.

In [6]:
address = ' Ahemedabad'
geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Ahmedabad are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Ahmedabad are 23.0496453, 72.6071588.


### This is hidden cell, here I uploaded data of lat and lon of city to watson studio project assets and then called it here.

Now in this step, I am just sorting the table

In [8]:
final_table = sorted_city.set_index('Borough').join(geo_data.set_index('Name'))
final_table = final_table.reset_index(drop=False)
final_table

Unnamed: 0,index,Price Range,Average Price,Rise Price,Latitude,Longitude
0,Adani Group Adani Shantigram,"2,507 - 14,258 / sqft",4785,8%,,
1,Adani Group The Meadows,"3,111 - 8,112 / sqft",4485,22.1%,,
2,Adani Group Water Lily,"3,150 - 9,126 / sqft",3790,25.6%,,
3,Adi Heritage Skyz,"5,714 - 6,904 / sqft",6308,-,,
4,Ahmedabad Patan Highway Road,"1,939 / sqft","1,939.39 / sqft",-,,
5,Ahmedabad Patan Highway Road,"253 - 3,898 / sqft","3,897.73 / sqft",-,,
6,Ahmedabad Patan Highway Road,"1,017 - 12,831 / sqft","8,105.72 / sqft",-,,
7,Ahmedabad Patan Highway Road,,,-,,
8,Ajmera Group And Sheetal Infrastructure Pvt Lt...,"4,932 - 7,420 / sqft",6084,-8.5%,,
9,Ambavadi,"8,333 - 23,641 / sqft","23,640.66 / sqft",-42.7%,,


In [9]:
final_table.dropna(axis=0, how='all', inplace=False)
final_table.Latitude.unique()
avg_lat=final_table['Latitude'].astype('float').mean(axis=0)
final_table['Latitude'].replace(np.nan, avg_lat, inplace=True)
avg_lon=final_table['Longitude'].astype('float').mean(axis=0)
final_table['Longitude'].replace(np.nan, avg_lon, inplace=True)

In [10]:
map_amdavad = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, AveragePrice in zip(final_table['Latitude'], final_table['Longitude'], final_table['index'],final_table['Average Price']):
    label = '{} , {}'.format(borough , AveragePrice)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_amdavad)  
map_amdavad