In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
np.random.seed(0)
mean = 0
variance = 2
x = np.random.normal(mean, variance ** 0.5)
y = np.random.normal(mean, variance ** 0.5)
print(f"The x coordinate of a randomly thrown dart is {x:.2f}")
print(f"The y coordinate of a randomly thrown dart is {y:.2f}")

In [None]:
np.random.seed(1)
bulls_eye1 = [0, 0]
bulls_eye2 = [6, 0]
bulls_eyes = [bulls_eye1, bulls_eye2]
x_coordinates, y_coordinates = [], []
for bulls_eye in bulls_eyes:
    for _ in range(5000):
        x = np.random.normal(bulls_eye[0], variance ** 0.5)
        y = np.random.normal(bulls_eye[1], variance ** 0.5)
        x_coordinates.append(x)
        y_coordinates.append(y)
        
plt.scatter(x_coordinates, y_coordinates)

In [None]:
from scipy.spatial.distance import euclidean
def nearest_bulls_eye(dart):
    distances = [euclidean(dart, bulls_e) for bulls_e in bulls_eyes]
    return np.argmin(distances)

darts = [[0,1], [6, 1]]
for dart in darts:
    index = nearest_bulls_eye(dart)
    print(f"The dart at position {dart} is closest to bulls-eye {index}")

In [None]:
def color_by_cluster(darts):
    nearest_bulls_eyes = [nearest_bulls_eye(dart) for dart in darts]
    for bs_index in range(len(bulls_eyes)):
        selected_darts = [darts[i] for i in range(len(darts))
                          if bs_index == nearest_bulls_eyes[i]]
        x_coordinates, y_coordinates = np.array(selected_darts).T
        plt.scatter(x_coordinates, y_coordinates, 
                    color=['g', 'k'][bs_index])

darts = [[x_coordinates[i], y_coordinates[i]]  
         for i in range(len(x_coordinates))]
color_by_cluster(darts)

In [None]:
bulls_eyes = np.array(darts[:2])
color_by_cluster(darts)

In [None]:
def update_bulls_eyes(darts):
    updated_bulls_eyes = []
    nearest_bulls_eyes = [nearest_bulls_eye(dart) for dart in darts]
    for bs_index in range(len(bulls_eyes)):
        selected_darts = [darts[i] for i in range(len(darts))
                          if bs_index == nearest_bulls_eyes[i]]
        x_coordinates, y_coordinates = np.array(selected_darts).T
        mean_center = [np.mean(x_coordinates), np.mean(y_coordinates)]
        updated_bulls_eyes.append(mean_center)
        
    return updated_bulls_eyes

bulls_eyes = update_bulls_eyes(darts)
color_by_cluster(darts)

In [None]:
for i in range(10):
    bulls_eyes = update_bulls_eyes(darts)
    
color_by_cluster(darts)

In [None]:
from sklearn.cluster import KMeans

In [None]:
cluster_model = KMeans(n_clusters=2)
assigned_bulls_eyes = cluster_model.fit_predict(darts)

print("Bull's-eye assignments:")
print(assigned_bulls_eyes)

In [None]:
for bs_index in range(len(bulls_eyes)):
    selected_darts = [darts[i] for i in range(len(darts))
                      if bs_index == assigned_bulls_eyes[i]]
    x_coordinates, y_coordinates = np.array(selected_darts).T
    plt.scatter(x_coordinates, y_coordinates, 
                color=['g', 'k'][bs_index])

In [None]:
new_darts = [[500, 500], [-500, -500]]
new_bulls_eye_assignments = cluster_model.predict(new_darts)
for i, dart in enumerate(new_darts):
    bulls_eye_index = new_bulls_eye_assignments[i]
    print(f"Dart at {dart} is closest to bull's-eye {bulls_eye_index}")

In [None]:
k_values = range(1, 10)
inertia_values = [KMeans(k).fit(darts).inertia_
                  for k in k_values]

plt.plot(k_values, inertia_values)
plt.xlabel('K')
plt.ylabel('Inertia')
plt.show()

In [None]:
new_bulls_eye = [12, 0]
for _ in range(5000):
    x = np.random.normal(new_bulls_eye[0], variance ** 0.5)
    y = np.random.normal(new_bulls_eye[1], variance ** 0.5)
    darts.append([x, y])

inertia_values = [KMeans(k).fit(darts).inertia_
                  for k in k_values]

plt.plot(k_values, inertia_values)
plt.xlabel('K')
plt.ylabel('Inertia')

In [None]:
from sklearn.datasets import make_circles

In [None]:
x_coordinates = []
y_coordinates = []
for factor in [.3, .6, .99]:
    rock_ring, _ = make_circles(n_samples=800, factor=factor, noise=.03, random_state=1)
    for rock in rock_ring:
        x_coordinates.append(rock[0])
        y_coordinates.append(rock[1])
plt.scatter(x_coordinates, y_coordinates)

In [None]:
rocks = [[x, y] for x, y in zip(x_coordinates, y_coordinates)]

In [None]:
rock_clusters = KMeans(n_clusters=3).fit_predict(rocks)

In [None]:
len(rock_clusters)

In [None]:
colors = [['g', 'y', 'k'][cluster] for cluster in rock_clusters]

In [None]:
plt.scatter(x_coordinates, y_coordinates, color=colors)

In [None]:
epsilon = .1
min_points = 10

In [None]:
neighbor_indices = [i for i, rock in enumerate(rocks[1:]) if euclidean(rocks[0], rock) <= epsilon]

In [None]:
len(neighbor_indices)

In [None]:
num_neighbors = len(neighbor_indices)
print(f"The rock at index 0 has {num_neighbors} neighbors.")
if num_neighbors >= min_points:
    print("It lies in a dense region.")
else:
    print("It does not lie in a dense region.")

In [None]:
dense_region_indices = [0] + neighbor_indices

In [None]:
dense_region_cluster = [rocks[i] for i in dense_region_indices]

In [None]:
dense_cluster_size = len(dense_region_cluster)

In [None]:
f"We found a dense cluster containing {dense_cluster_size} rocks"

In [None]:
dense_region_indices = set(dense_region_indices)

In [None]:
for index in neighbor_indices:
    point = rocks[index]
    neighbors_of_neighbors = [i for i, rock in enumerate(rocks) if euclidean(point, rock) <= epsilon]
    if len(neighbors_of_neighbors) >= min_points:
        dense_region_indices.update(neighbors_of_neighbors)

In [None]:
from sklearn.cluster import DBSCAN

In [None]:
cluster_model = DBSCAN(eps=epsilon, min_samples=min_points)

In [None]:
cluster_model = DBSCAN(eps=epsilon, min_samples=min_points)
rock_clusters = cluster_model.fit_predict(rocks)
colors = [['g', 'y', 'k'][cluster] for cluster in rock_clusters]
plt.scatter(x_coordinates, y_coordinates, color=colors)

In [None]:
len(rock_clusters)

In [None]:
def manhattan_distance(point_a, point_b):
    num_blocks = np.sum(np.absolute(point_a - point_b))
    return .17 * num_blocks

In [None]:
x = np.array([34, 5])
y = np.array([57, 8])

In [None]:
distance = manhattan_distance(x, y)

In [None]:
f"Manhatten distance is {distance} miles"

In [None]:
points = [[35, 5], [33, 6], [37, 4], [40, 7], [45, 5]]

In [None]:
clusters = DBSCAN(eps=1, min_samples=3, metric=manhattan_distance).fit_predict(points)

In [None]:
for i, cluster in enumerate(clusters):
    point = points[i]
    if cluster == -1:
        print(f"Point at index {i} is an outlier")
        plt.scatter(point[0], point[1], marker='x', color='k')
    else:
        print(f"Point at index {i} is in cluster {cluster}")
        plt.scatter(point[0], point[1], color='g')
plt.grid(visible=True, which='both', alpha=.5)
plt.minorticks_on()

In [None]:
x_coordinates, y_coordinates = np.array(rocks).T

In [None]:
df = pd.DataFrame({'X': x_coordinates, 'Y': y_coordinates, 'Cluster': rock_clusters})

In [None]:
df

In [None]:
df_cluster = df[df.Cluster == 0]

In [None]:
plt.scatter(df_cluster['X'], df_cluster['Y'])

In [None]:
for cluster_id, df_cluster in df.groupby('Cluster'):
    if cluster_id == 0:
        print(f"Skipping over cluster {cluster_id}")
        continue
    print(f"Plotting cluster {cluster_id}")
    plt.scatter(df_cluster.X, df_cluster.Y)

In [None]:
from math import sin, cos, asin
import numpy as np
def great_circle_distance(x1, y1, x2, y2):
    delta_x, delta_y = x2 - x1, y2 - y1
    haversin = sin(delta_x / 2) ** 2 + np.prod([cos(x1), cos(x2), sin(delta_y / 2) ** 2])
    return 2 * asin(haversin ** .5)

In [None]:
from math import pi

In [None]:
distance = great_circle_distance(0, 0, 0, pi)

In [None]:
f'The distance equals {distance} units'

In [None]:
latitude_north, longitude_north = (90.0, 0)
latitude_south, longitude_south = (-90.0, 0)

In [None]:
to_radians = np.radians([latitude_north,longitude_north, latitude_south, longitude_south])

In [None]:
to_radians

In [None]:
distance = great_circle_distance(*to_radians)

In [None]:
distance

In [None]:
earth_distance = 3956 * distance

In [None]:
f'The distance between poles equals {earth_distance} miles'

In [None]:
def travel_distance(lat1, lon1, lat2, lon2):
    return great_circle_distance(*np.radians([lat1, lon1, lat2, lon2])) * 3956

In [None]:
assert travel_distance(90, 0, -90, 0) == earth_distance

In [None]:
from cartopy.crs import PlateCarree

In [None]:
plt.axes(projection=PlateCarree()).coastlines()

In [None]:
plt.figure(figsize=(12, 8))
plt.axes(projection=PlateCarree()).coastlines()

In [None]:
plt.figure(figsize=(12, 8))
plt.axes(projection=PlateCarree()).stock_img()

In [None]:
import cartopy
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=PlateCarree())
ax.coastlines()
ax.add_feature(cartopy.feature.OCEAN)
ax.add_feature(cartopy.feature.LAND)

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=PlateCarree())
ax.coastlines()
ax.add_feature(cartopy.feature.OCEAN)
ax.add_feature(cartopy.feature.LAND)
ax.add_feature(cartopy.feature.BORDERS)

In [None]:
plt.figure(figsize=(12, 8))
coordinates = [
    (39.9526, -75.1652),
    (37.7749, -122.4194),
    (40.4406, -79.9959),
    (38.6807, -108.9769),
    (37.8716, -112.2727),
    (40.7831, -73.9712),
]
latitudes, longitudes = np.array(coordinates).T
ax = plt.axes(projection=PlateCarree())
ax.scatter(longitudes, latitudes)
ax.set_global()
ax.coastlines()

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=PlateCarree())
north_america_extent = [-145, -50, 0, 90]
ax.set_extent(north_america_extent)
ax.scatter(longitudes, latitudes)
def add_map_features():
    ax.coastlines()
    ax.add_feature(cartopy.feature.BORDERS)
    ax.add_feature(cartopy.feature.OCEAN)
    ax.add_feature(cartopy.feature.LAND)
add_map_features()

In [None]:
from cartopy.crs import LambertConformal

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=LambertConformal())
us_extent = [-120, -75, 20, 50]
ax.set_extent(us_extent)
ax.scatter(longitudes, latitudes, transform=PlateCarree(), color='r', s=100)
add_map_features()

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=LambertConformal())
us_extent = [-120, -75, 20, 50]
ax.set_extent(us_extent)
ax.scatter(longitudes, latitudes, transform=PlateCarree(), color='r', s=100)
add_map_features()
ax.add_feature(cartopy.feature.STATES)

In [None]:
from geonamescache import GeonamesCache
gc = GeonamesCache()

In [None]:
continents = gc.get_continents()

In [None]:
continents

In [None]:
continents.keys()

In [None]:
north_america = continents['NA']

In [None]:
north_america.keys()

In [None]:
for name_key in ['name', 'asciiName', 'toponymName']:
    print(north_america[name_key])

In [None]:
latitude, longitude = float(north_america['lat']), float(north_america['lng'])

In [None]:
latitude, longitude

In [None]:
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=PlateCarree())
ax.set_extent(north_america_extent)
ax.scatter([longitude], [latitude], s=200)
add_map_features()