In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('transactions.csv')
data.dropna(subset=['meter_sale_price'], inplace=True) 

In [3]:
import requests
import time
import pickle
from collections import defaultdict
import numpy as np

In [10]:
# Coordinates for the city center (Burj Khalifa, Dubai)
city_center_coords = (25.1972, 55.2744)

In [14]:
# Load or initialize the cache
try:
    with open('geocode_cache.pkl', 'rb') as f:
        geocode_cache = pickle.load(f)
except FileNotFoundError:
    geocode_cache = {}

def save_cache(cache):
    with open('geocode_cache.pkl', 'wb') as f:
        pickle.dump(cache, f)

API_KEY = 'ce8004f8a6064299bfff7c815d2425cb'

def get_coordinates_nominatim(location):
    if location in geocode_cache:
        return geocode_cache[location]
    url = f"https://api.opencagedata.com/geocode/v1/json?q={location}+Dubai&key={API_KEY}"
    response = requests.get(url).json()
    if response:
        geometry = response['results'][0]['geometry']
        coordinates = (float(geometry['lat']), float(geometry['lng']))
        geocode_cache[location] = coordinates
        return coordinates
    else:
        return None

def haversine(coord1, coord2):
    # Calculate the great circle distance between two points 
    # on the Earth (specified in decimal degrees)
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    # Convert decimal degrees to radians 
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    # Haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371 # Radius of Earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r


In [15]:

data['coordinates'] = data['nearest_metro_en'].apply(get_coordinates_nominatim)
data['distance_to_center'] = data['coordinates'].apply(
    lambda x: haversine(city_center_coords, x) if x else None
)
save_cache(geocode_cache)

# Save the final DataFrame
data.to_csv('properties_with_distances.csv', index=False)

data

Unnamed: 0,transaction_id,procedure_id,trans_group_id,trans_group_ar,trans_group_en,procedure_name_ar,procedure_name_en,instance_date,property_type_id,property_type_ar,...,procedure_area,actual_worth,meter_sale_price,rent_value,meter_rent_price,no_of_parties_role_1,no_of_parties_role_2,no_of_parties_role_3,coordinates,distance_to_center
0,1-11-2004-2099,11,1,مبايعات,Sales,بيع,Sell,04-10-2004,4,فيلا,...,860.28,4500000.0,5230.86,,,3.0,1.0,0.0,"(25.0657, 55.17128)",17.932402
1,1-11-2011-6898,11,1,مبايعات,Sales,بيع,Sell,21-04-2011,3,وحدة,...,89.93,2008888.0,22338.35,,,1.0,1.0,0.0,"(25.0657, 55.17128)",17.932402
2,1-11-2008-203212,11,1,مبايعات,Sales,بيع,Sell,11-06-2008,3,وحدة,...,67.00,330000.0,4925.37,,,1.0,1.0,0.0,"(25.2295686, 55.3914759)",12.315636
3,1-11-2010-15769,11,1,مبايعات,Sales,بيع,Sell,25-10-2010,3,وحدة,...,100.80,928756.0,9213.85,,,1.0,1.0,0.0,"(25.0799529, 55.1475937)",18.245763
4,1-110-2010-557,110,1,مبايعات,Sales,تسجيل إيجارة تنتهى بالتملك,Lease to Own Registration,24-03-2010,3,وحدة,...,43.00,290000.0,6744.19,,,4.0,2.0,2.0,"(25.2295686, 55.3914759)",12.315636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1182462,1-102-2010-2872,102,1,مبايعات,Sales,بيع - تسجيل مبدئى,Sell - Pre registration,20-05-2010,3,وحدة,...,313.37,4650075.0,14838.93,,,,,,"(25.0657, 55.17128)",17.932402
1182463,1-102-2010-2741,102,1,مبايعات,Sales,بيع - تسجيل مبدئى,Sell - Pre registration,07-10-2010,3,وحدة,...,126.07,1100000.0,8725.31,,,,,,"(25.1180879, 55.1342009)",16.628136
1182464,1-102-2010-2911,102,1,مبايعات,Sales,بيع - تسجيل مبدئى,Sell - Pre registration,15-07-2010,3,وحدة,...,82.68,845500.0,10226.17,,,,,,"(25.0657, 55.17128)",17.932402
1182465,1-102-2008-48417,102,1,مبايعات,Sales,بيع - تسجيل مبدئى,Sell - Pre registration,21-12-2009,3,وحدة,...,75.08,691000.0,9203.52,,,,,,"(-19.848297, -44.0350419)",11830.392519
