<a href="https://colab.research.google.com/github/lamphgg/Airbnb_Amsterdam/blob/main/Numpy_Project_code_PFDI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Downloading the Dataset


In [None]:
%%capture
!pip install numpy pandas streamlit gdown currencyconverter

In [None]:
import numpy as np

# For readability purposes, we will disable scientific notation for numbers
np.set_printoptions(suppress=True)

In [None]:
import os
import shutil

import gdown
from numpy import genfromtxt

# Download file from Google Drive
# This file is based on data from: http://insideairbnb.com/get-the-data/
file_id_1 = "13fyESiH1ZEnMV6eabAyhe20t4W6peEWK"
downloaded_file_1 = "WK1_Airbnb_Amsterdam_listings_proj.csv"

# Download the file from Google Drive
gdown.download(id=file_id_1, output=downloaded_file_1)

Downloading...
From: https://drive.google.com/uc?id=13fyESiH1ZEnMV6eabAyhe20t4W6peEWK
To: /content/WK1_Airbnb_Amsterdam_listings_proj.csv
100%|██████████| 246k/246k [00:00<00:00, 73.3MB/s]


'WK1_Airbnb_Amsterdam_listings_proj.csv'

## Preprocessing the Dataset


In [None]:
from numpy import genfromtxt

my_data = genfromtxt(downloaded_file_1, delimiter='|',dtype='unicode')

In [None]:
print(my_data[:,:4])

[['' '0' '1' '2']
 ['id' '23726706' '35815036' '31553121']
 ['price' '$88.00' '$105.00' '$152.00']
 ['latitude' '52.34916' '52.42419' '52.43237']
 ['longitude' '4.97879' '4.95689' '4.91821']]


In [None]:
# Remove the first column and row
matrix = my_data[1:,1:]
matrix[:,:4]

array([['23726706', '35815036', '31553121', '34745823'],
       ['$88.00', '$105.00', '$152.00', '$87.00'],
       ['52.34916', '52.42419', '52.43237', '52.2962'],
       ['4.97879', '4.95689', '4.91821', '5.01231']], dtype='<U18')

In [None]:
# Shift the matrix by 90 degrees
matrix = matrix.T
print(matrix[:5,:])

[['23726706' '$88.00' '52.34916' '4.97879']
 ['35815036' '$105.00' '52.42419' '4.95689']
 ['31553121' '$152.00' '52.43237' '4.91821']
 ['34745823' '$87.00' '52.2962' '5.01231']
 ['44586947' '$160.00' '52.31475' '5.0303']]


In [None]:
# Remove the dollar sign and the comma
matrix = np.char.replace(matrix, "$", "")
matrix = np.char.replace(matrix, ",", "")

In [None]:
# Check if the dollar sign is in our dataset
matrix[(np.char.find(matrix, "$") > -1)]

array([], dtype='<U18')

In [None]:
# Check if the comma sign is in our dataset
matrix[(np.char.find(matrix, ",") > -1)]

array([], dtype='<U18')

In [None]:
# Change Unicode to float32
matrix = np.float32(matrix)
print(matrix[:5,:])

[[23726706.            88.            52.34916        4.97879]
 [35815036.           105.            52.42419        4.95689]
 [31553120.           152.            52.43237        4.91821]
 [34745824.            87.            52.2962         5.01231]
 [44586948.           160.            52.31475        5.0303 ]]


## Convert currency

In [None]:
from currency_converter import CurrencyConverter

cc = CurrencyConverter()

# Entries: airbnb_id, price_usd, latitude, longitude
print(matrix[:5,:])

[[23726706.            88.            52.34916        4.97879]
 [35815036.           105.            52.42419        4.95689]
 [31553120.           152.            52.43237        4.91821]
 [34745824.            87.            52.2962         5.01231]
 [44586948.           160.            52.31475        5.0303 ]]


In [None]:
print('\tSecond column is:\n', matrix[:, 1])

	Second column is:
 [ 88. 105. 152. ... 180. 174.  65.]


In [None]:
cc.currencies

#### Converting to EUR

In [None]:
eur_rate = cc.convert(1,'USD','EUR')

# Multiply the dollar column by EUR currency
matrix[:,1] = matrix[:,1] * eur_rate
print(matrix[:,1])

[ 81.37599  97.09636 140.55853 ... 166.4509  160.90253  60.10727]


In [None]:
# Multiply the dollar column by the inflation percentage (1.00 + inflation)
matrix[:,1] = matrix[:,1] * (1.00 + 0.07)
matrix[:,1]

array([ 87.07232, 103.89311, 150.39764, ..., 178.10246, 172.16571,
        64.31478], dtype=float32)

In [None]:
# Round down the new currency column to 2 decimals
matrix[:,1] = np.round_(matrix[:,1], decimals=2)
matrix[:,1]

array([ 87.07, 103.89, 150.4 , ..., 178.1 , 172.17,  64.31], dtype=float32)

#### Choose a location: Van Gogh Museum

I get the coordinates from [here](https://www.google.com/search?q=coordinates+of+van+gogh+museum+amsterdam&rlz=1C1ONGR_enUS1042US1042&oq=coordinates+of+van+gogh+museum+amsterdam&aqs=chrome..69i57j0i22i30i625j0i390l3.5845j0j7&sourceid=chrome&ie=UTF-8)




In [None]:
#Coordinates of Van Gogh Museum
latitude = 52.3584
longitude = 4.8811

In [None]:
import math

def from_location_to_airbnb_listing_in_meters(lat1: float, lon1: float, lat2: list, lon2: list):
    # Source: https://community.esri.com/t5/coordinate-reference-systems-blog
    # /distance-on-a-sphere-the-haversine-formula/ba-p/902128
    
    R = 6371000  # Radius of Earth in meters
    phi_1 = math.radians(lat1)
    phi_2 = math.radians(lat2)

    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = (
        math.sin(delta_phi / 2.0) ** 2
        + math.cos(phi_1) * math.cos(phi_2) * math.sin(delta_lambda / 2.0) ** 2
    )

    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    meters = R * c  # Output distance in meters

    return round(meters, 0)

In [None]:
# Create a loop or vectorized way to calculate the distance,
# going over all latitude and longitude entries in the dataset

vectorizing_function = np.vectorize(from_location_to_airbnb_listing_in_meters)
distance = vectorizing_function(latitude, longitude, matrix[:,2], matrix[:,3])

In [None]:
%%timeit -r 4 -n 100

# Allow a Python function to be used in a (semi-)vectorized way
conv_to_meters = np.vectorize(from_location_to_airbnb_listing_in_meters)

# Apply the function, use timeit
conv_to_meters(latitude, longitude, matrix[:, 2], matrix[:, 3])

36.4 ms ± 10.8 ms per loop (mean ± std. dev. of 4 runs, 100 loops each)


## Prep the Dataset for Download!





In [None]:
# Run the previous method
meters = from_location_to_airbnb_listing_in_meters(
    latitude, longitude, matrix[:, 2], matrix[:, 3]
)

# Add an axis to make concatenation possible
meters = meters.reshape(-1, 1)

# Append the distance in meters to the matrix
matrix = np.concatenate((matrix, meters), axis=1)

In [None]:
# Append a color to the matrix
colors = np.zeros(meters.shape)
matrix = np.concatenate((matrix, colors), axis=1)

# Append our entry to the matrix
fav_entry = np.array([1, 0, 52.3600, 4.8852, 0, 1]).reshape(1, -1) # Change coordinates to your favorite location
matrix = np.concatenate((fav_entry, matrix), axis=0)

# Entries: airbnb_id, price, latitude, longitude,
# meters from favorite point, color
matrix[:5, :]

array([[       1.        ,        0.        ,       52.36      ,
               4.8852    ,        0.        ,        1.        ],
       [23726706.        ,       87.06999969,       52.34915924,
               4.97878981,     6714.        ,        0.        ],
       [35815036.        ,      103.88999939,       52.42419052,
               4.95689011,     8943.        ,        0.        ],
       [31553120.        ,      150.3999939 ,       52.43236923,
               4.91821003,     8602.        ,        0.        ],
       [34745824.        ,       86.08000183,       52.2961998 ,
               5.01231003,    11284.        ,        0.        ]])

In [None]:
# Export the data to use in the primer for next week
np.savetxt("WK1_Airbnb_Amsterdam_listings_proj_solution.csv", matrix, delimiter=",")