In [0]:
import requests
import geopandas as gpd
import geojson
import pandas as pd
from tqdm import tqdm

# Show progress bar
tqdm.pandas()

In [0]:
ames_housing = pd.read_csv('data/AmesHousing.csv')
realestate_data = pd.read_csv('data/Ames_Real_Estate_Data.csv')

neighbourhoods = pd.read_csv('data/neighbourhood_codes.csv', names=['codes', 'Neighborhood_name'])
neighbourhoods['codes']=neighbourhoods['codes'].str.strip()


In [0]:
cleaned_data = ames_housing.merge(neighbourhoods, left_on='Neighborhood', right_on='codes')

In [0]:
# Confirming no problems with the join
cleaned_data.loc[:,['Neighborhood', 'Neighborhood_name']].info()

In [0]:
cleaned_data = cleaned_data.merge(realestate_data, left_on='PID', right_on='MapRefNo')

In [0]:
# Confirming no problems with the join
print(len(cleaned_data.loc[:,'PID'].unique()))
cleaned_data.loc[:,['PID', 'MapRefNo']]

In [0]:
from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Use Nominatim as geolocator provider
geolocator = Nominatim(user_agent="ames-iowa-nycdsa-1")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Get location from provider
cleaned_data['Prop_Addr_Full'] = cleaned_data['Prop_Addr'] + ", Ames, IA, USA"
cleaned_data['geolocation'] = geocode(cleaned_data['Prop_Addr_Full'])

In [0]:
# Search for the place
neigh = 'Bloomington+Heights'
url = 'https://nominatim.openstreetmap.org/search?q=17+Strada+Pictor+Alexandru+Romano%2C+Bukarest&format=geojson'
response = geojson.loads(url, verify=False)

# Parse the GeoJSON response
gdf = gpd.read_file(url, driver='GeoJSON', encoding='utf-8')

# Print the GeoDataFrame
gdf.head()

In [0]:
import googlemaps
from datetime import datetime

key='AIzaSyAb5MLQaoMau6INaHQFCbq1PhwIjgKv0Io'

gmaps = googlemaps.Client(key='AIzaSyAb5MLQaoMau6INaHQFCbq1PhwIjgKv0Io')

# Geocoding an address
geocode_result = gmaps.geocode('1600 Amphitheatre Parkway, Mountain View, CA')

# Look up an address with reverse geocoding
# reverse_geocode_result = gmaps.reverse_geocode((40.714224, -73.961452))

# # Validate an address with address validation
# addressvalidation_result =  gmaps.addressvalidation(['1600 Amphitheatre Pk'], 
#                                                     regionCode='US',
#                                                     locality='Mountain View', 
#                                                     enableUspsCass=True)

In [0]:
import geocoder
g = geocoder.arcgis('4025 BERKSHIRE AVE, Ames, IA, USA')
g.geojson

In [0]:
# Aquiring json coordinates
cleaned_data['geolocation'] = cleaned_data['Prop_Addr_Full'].apply(lambda x: geocoder.arcgis(x).json)

In [0]:
for i in range(len(cleaned_data['geolocation'])):
    cleaned_data['lat'][i] = cleaned_data['geolocation'][i]['lat']
    cleaned_data['lng'][i] = cleaned_data['geolocation'][i]['lng']


In [0]:
import numpy as np
import json
from shapely.geometry import shape

def geojsonification(x):
    
    geom = x['geolocation']['raw']['feature']

    if type(geom) == dict:
        s = json.dumps(geom)
        s2 = geojson.loads(s)
        res = shape(s2)
        return res
    else:
        return np.nan

In [0]:
cleaned_data['geometry'] = gpd.points_from_xy(cleaned_data.lng, cleaned_data.lat, crs="EPSG:4326")

In [0]:
cleaned_data_gdf = gpd.GeoDataFrame(cleaned_data, geometry='geometry')

In [0]:
display(cleaned_data.drop(columns='geometry'))

Databricks data profile. Run in Databricks to view.

In [0]:
cleaned_data_gdf.dissolve().centroid

In [0]:
# Remove entry with no address
cleaned_data = cleaned_data[~(cleaned_data['PID']==533210020)]

In [0]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt

mapbox_access_token = "pk.eyJ1IjoiZnJlZGVyaWNvcm9kcmlndWVzIiwiYSI6ImNrbmp6OGEyYjA2YWQycXBuYzc0c3Bha3oifQ.yQLJy408VtnzKC0Vgijv8A"

selected_column='Neighborhood_name'
gdf = cleaned_data_gdf
# Get all unique values from the 'category_column'
gdf = gdf.sort_values(by=selected_column)
unique_values = gdf.loc[:,selected_column].unique()

# Generate a list of distinct colors using the 'tab20' colormap from matplotlib
colors = plt.cm.tab20.colors
color_scale = [f'rgb({int(r * 255)},{int(g * 255)},{int(b * 255)})' for r, g, b in colors]
# Create the 'category_colors' dictionary by mapping each unique category to a color
unique_values_colours = {category: color for category, color in zip(unique_values, color_scale)}

# Map the categories in the GeoDataFrame to the corresponding colors
gdf['color'] = gdf[selected_column].map(unique_values_colours)

fig = go.Figure()

for val in unique_values:
    df_color = gdf[gdf[selected_column] == val]

    fig.add_trace(
        go.Scattermapbox(
                        name=val,
                        lat=df_color['lat'],
                        lon=df_color['lng'],
                        mode = 'markers',
                        marker = dict(
                            size = 5,
                            opacity = 1,

                            color = df_color['color'],
                        ),
                        # hovertext=df_color['tooltip'],
                        hovertemplate='<extra></extra>',
                    )
                )

fig.update_layout(
   autosize=True,
   hovermode='closest',
   mapbox=dict(
       accesstoken=mapbox_access_token,
       bearing=0,
       center=dict(
           lat=42.03327,
           lon=-93.64773
       ),
       pitch=0,
       zoom=10
   ),
)

display(fig)

In [0]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt

mapbox_access_token = "pk.eyJ1IjoiZnJlZGVyaWNvcm9kcmlndWVzIiwiYSI6ImNrbmp6OGEyYjA2YWQycXBuYzc0c3Bha3oifQ.yQLJy408VtnzKC0Vgijv8A"

selected_column='Lot Area'
gdf = cleaned_data

values = gdf[selected_column]
max_value = values.max()

gdf.dropna(subset=[selected_column], inplace=True)

fig = go.Figure()

fig.add_trace(
    go.Scattermapbox(
                    name=val,
                    lat=gdf['lat'],
                    lon=gdf['lng'],
                    mode = 'markers',
                    marker = dict(
                        size = 5,
                        opacity = 1,
                        color=gdf[selected_column],
                        colorscale='viridis',
                    ),
                    hovertext=gdf[selected_column],
                    hovertemplate='<extra></extra>',
                )
            )

fig.update_layout(
   autosize=True,
   hovermode='closest',
   mapbox=dict(
       accesstoken=mapbox_access_token,
       bearing=0,
       center=dict(
           lat=42.03327,
           lon=-93.64773
       ),
       pitch=0,
       zoom=10
   ),
)

display(fig)

In [0]:
import plotly.graph_objects as go
import matplotlib.pyplot as plt

mapbox_access_token = "pk.eyJ1IjoiZnJlZGVyaWNvcm9kcmlndWVzIiwiYSI6ImNrbmp6OGEyYjA2YWQycXBuYzc0c3Bha3oifQ.yQLJy408VtnzKC0Vgijv8A"

selected_column='Neighborhood_name'
gdf = cleaned_data_gdf

values = cleaned_data_gdf['Lot Area']
max_value = values.max()

def color_scale(value):
   return 'rgb(0, 0, {})'.format(int(255 * value / max_value))

# Create a Scattermapbox object
scatter = go.Scattermapbox(
    lat=df_color['lat'],
    lon=df_color['lng'],
    mode = 'markers',
    marker = go.scattermapbox.Marker(
        size = [10],
        color = [color_scale(value) for value in values], # set color based on a data column
        opacity = [0.8],
    ),
    text = ['Marker 1'],
)

fig = go.Figure(data=scatter)

fig.update_layout(
   autosize=True,
   hovermode='closest',
   mapbox=dict(
       accesstoken=mapbox_access_token,
       bearing=0,
       center=dict(
           lat=42.03327,
           lon=-93.64773
       ),
       pitch=0,
       zoom=10
   ),
)

display(fig)