In [5]:
from geopy.geocoders import Nominatim
import pandas as pd
import time

geolocator = Nominatim(user_agent="sector_locator")

def get_coordinates(sector):
    location = geolocator.geocode(f"Sector {sector}, Gurgaon, India")
    if location:
        return f"{location.latitude}, {location.longitude}"
    return None

df = pd.DataFrame(columns=["Sector", "Coordinates"])

for sector in range(1, 116):
    coordinates = get_coordinates(sector)
    df.loc[len(df)] = {"Sector": f"Sector {sector}", "Coordinates": coordinates}
    time.sleep(1)  # to respect API rate limit

df.to_csv("gurgaon_sectors_coordinates.csv", index=False)

## Coordinates CSV file

In [47]:
latlong = pd.read_csv('gurgaon_sectors_coordinates.csv')

In [48]:
latlong.head()

Unnamed: 0,Sector,Coordinates
0,Sector 1,"28.360102, 76.9483754"
1,Sector 2,"28.5090499, 77.0342831"
2,Sector 3,"28.4973905, 77.020526"
3,Sector 4,"28.4750063, 77.0103535"
4,Sector 5,"28.3673537, 76.9267238"


In [49]:
latlong.loc[len(latlong)] = {"Sector": "manesar", "Coordinates": "28.3515, 76.9428"}

In [50]:
latlong.loc[len(latlong)] = {"Sector": "sohna road", "Coordinates": "28.25, 77.07"}

In [51]:
latlong = latlong.rename(columns = {'Sector':'sector'})

In [52]:
latlong['latitude'] = latlong['Coordinates'].str.split(',').str.get(0)

In [53]:
latlong['longitude'] = latlong['Coordinates'].str.split(',').str.get(1)

In [59]:
latlong.sample(5)

Unnamed: 0,sector,Coordinates,latitude,longitude
91,Sector 92,"28.4089049, 76.9155232",28.4089049,76.9155232
22,Sector 23,"28.510319, 77.0530094",28.510319,77.0530094
60,Sector 61,"28.4110188, 77.0963685",28.4110188,77.0963685
103,Sector 104,"28.4795324, 76.9937197",28.4795324,76.9937197
10,Sector 11,"28.3781429, 76.8753775",28.3781429,76.8753775


In [67]:
latlong['sector'] = latlong['sector'].str.lower()

In [68]:
latlong.sample(5)

Unnamed: 0,sector,Coordinates,latitude,longitude
3,sector 4,"28.4750063, 77.0103535",28.4750063,77.0103535
31,sector 32,"28.4458035, 77.0413169",28.4458035,77.0413169
97,sector 98,"28.5069625, 77.018321",28.5069625,77.018321
65,sector 66,"28.3974292, 77.0538823",28.3974292,77.0538823
90,sector 91,"28.4007588, 76.9210802",28.4007588,76.9210802


## Gurgaon dataset

In [61]:
data = pd.read_csv('gurgaon_properties_missing_value_imputation.csv')

In [63]:
data.sample(5)

Unnamed: 0,property_type,society,sector,price,price_per_sqft,bedRoom,bathroom,balcony,floorNum,agePossession,built_up_area,servant room,study room,pooja room,store room,others,furnished_type,luxury_score
2184,flat,supertech hues,sector 68,1.0,8475.0,2,2,3,23.0,New Property,1311.0,0,0,0,0,0,0,49
2681,flat,parsvnath exotica,sector 53,5.0,17271.0,3,4,3+,8.0,Relatively New,2500.0,1,0,1,0,1,1,49
1365,flat,zara aavaas,sector 104,0.65,8519.0,3,2,2,7.0,New Property,848.0,0,0,0,0,0,0,65
1089,flat,puri emerald bay,sector 104,2.49,10163.0,3,3,3+,14.0,Relatively New,2217.0,1,0,0,0,0,1,129
1507,flat,sbtl caladium,sector 109,1.7,6680.0,3,3,3+,9.0,Relatively New,2303.0,1,0,0,0,0,0,159


### Join latlong data in gurgaon data on sector

In [71]:
data = data.merge(latlong, on = 'sector').drop(columns = 'Coordinates')

In [72]:
data.sample(5)

Unnamed: 0,property_type,society,sector,price,price_per_sqft,bedRoom,bathroom,balcony,floorNum,agePossession,built_up_area,servant room,study room,pooja room,store room,others,furnished_type,luxury_score,latitude,longitude
2619,flat,shree vardhman flora,sector 90,0.7,5178.0,2,2,3,9.0,New Property,1224.0,0,1,0,0,0,0,97,28.4069974,76.9365562
1807,house,independent,sector 26,9.0,33333.0,5,5,2,3.0,Old Property,2700.0,1,1,1,1,0,2,110,28.4779375,77.1032425
2705,flat,dlf the ultima,sector 81,2.3,10994.0,3,4,3+,6.0,New Property,1893.0,1,0,0,0,0,2,167,28.3873647,76.9475835
520,flat,mapsko royale ville,sector 82,1.22,7176.0,3,5,3+,10.0,Relatively New,2000.0,1,0,1,0,0,1,76,28.3931076,76.9588762
2561,flat,dlf the icon,sector 53,5.25,20388.0,4,4,3,9.0,Old Property,2570.0,1,0,1,0,1,0,135,28.4415191,77.096805


In [76]:
data.to_csv('gurgaon-data-viz.csv', index = False)