In [18]:
import numpy as np
import pandas as pd
import numpy
import joblib
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
import requests
import heapq
import folium
from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup

In [2]:
df = pd.read_csv('data.csv')
df.sort_values(['Sample Time'], inplace=True, ascending=True)
df.head()

Unnamed: 0,Sample Time,1212480 Lane 1 Flow,1212480 Lane 2 Flow,1212480 Lane 3 Flow,day_of_week,day1,day2,day3,day4,day5,...,minute15,minute20,minute25,minute30,minute35,minute40,minute45,minute50,minute55,total
101348,01-01-2021 00:00,1,6,3,5,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,10
101349,01-01-2021 00:05,0,3,5,5,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,8
101350,01-01-2021 00:10,0,2,2,5,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,4
101351,01-01-2021 00:15,1,5,0,5,0,0,0,0,1,...,1,0,0,0,0,0,0,0,0,6
101352,01-01-2021 00:20,1,9,2,5,0,0,0,0,1,...,0,1,0,0,0,0,0,0,0,12


In [3]:
def get_input():
    time = pd.to_datetime("04-26-2024 16:35:00")
    print("time : ", time)
    day_of_week = time.weekday()+1
    print("day of week : ", day_of_week)
    input_array = np.zeros(43)
    print("input array : ", input_array)
    input_array[day_of_week - 1] = 1
    hour = time.hour
    input_array[6 + hour - 1] = 1
    minute = time.minute
    minute = round(minute / 5) * 5
    input_array[31 + round(minute / 5)] = 1
    print("updated input array : ", input_array)
    return input_array
input_array = get_input()

time :  2024-04-26 16:35:00
day of week :  5
input array :  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
updated input array :  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


In [4]:
def transform_input(input_array):
    train_features = df.drop(['Sample Time','total', '1212480 Lane 1 Flow','1212480 Lane 2 Flow','1212480 Lane 3 Flow','day_of_week','hour','minute'], axis = 1)
    poly = PolynomialFeatures(2)
    train_label = df['total']
    x_train, x_test, y_train, y_test = train_test_split(train_features, train_label, test_size=0.1, random_state=1, shuffle=True)
    poly.fit(x_train)
    transformed_array = poly.transform(input_array.reshape(1,43))
    print("transformed : ", transformed_array)
    return transformed_array
transformed_array = transform_input(input_array)

transformed :  [[1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 



In [5]:
def make_predictions(transformed_array):
    with open('rf.pkl', 'rb') as file:
        model = joblib.load(file)
    prediction = model.predict(transformed_array.reshape(1,43))
    return prediction
prediction = make_predictions(input_array).round()
print("prediction : ", prediction)

prediction :  [225.]




In [6]:
def newsapi():
    url = ('https://newsapi.org/v2/everything?'
           'q=california&'
           'in=San Francisco&'  # Corrected the typo in 'Los angels' to 'Los Angeles'
           'from=2024-04-20T00:00:01Z&'
           'to=2024-04-24T23:59:59Z&'
           'apiKey=f9c8762980744e5b9660c8b43090af2d')
    response = requests.get(url)
    articles = []
    for article in response.json()['articles']:
        description = article.get('description', '')  # Get the description or an empty string if not present
        if description is not None and any(keyword in description.lower() for keyword in ['accident', 'protest', 'lane close']):
            print(description)
            articles.append(description)
    if len(articles) == 0:
        print("No news found")
    return articles
articles = newsapi()

After dozens of pro-Palestinian protesters were arrested at Columbia, Yale and NYU, students at colleges from Massachusetts to Minnesota to California are erecting encampments in solidarity.
A tram accident at Universal Studios Hollywood in Los Angeles injured 15 people Saturday night, authorities and the company said.
The Southern California home Anne Heche destroyed in the car accident that claimed her life has been fully restored -- and it's on the market too ... going for a bundle. The 3-bedroom, 3-bathroom Los Angeles residence looks strikingly different…
La semana pasada Google anunció el despido de 28 empleados que habían participado en una protesta contra el millonario contrato que Google había firmado con el gobierno de Israel por el uso de Google Cloud.
<!-- BREAK 1 -->
Sundar Pichai se ha posicionado a t…
Protests on college campuses related to the Israel-Hamas War have many Jews nervous heading into the holiday.


In [27]:
# webscraping
url = "https://www.sfchronicle.com/in-depth-projects/2024/"
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the elements containing the data you want to extract
    web_data = soup.find_all('div', class_='f fdc mx20 sm:mx32 xl:mxa sy20 mb40 xl:mw1200px')

    # Check if any data was found
    if web_data:
        # Process the data
        for data in web_data:
            # Extract and print the text content of each element
            print(data.text.strip())
    else:
        print("No data found on the webpage.")
else:
    print("Failed to retrieve the webpage.")

Failed to retrieve the webpage.


In [48]:
def dijkstra(graph, start, end):
    # Initialize distances dictionary with infinity for all nodes except start node
    distances = {node: float('inf') for node in graph}
    distances[start] = 0

    # Priority queue to store nodes to visit
    pq = [(0, start)]  # (distance, node)

    # Dictionary to store the previous node in the shortest path
    previous = {}

    while pq:
        current_distance, current_node = heapq.heappop(pq)

        # Skip this iteration if the current distance to this node is not the shortest
        if current_distance > distances[current_node]:
            continue

        # Explore neighbors of the current node
        for neighbor, weight in graph[current_node].items():
            distance = current_distance + weight
            # If this path is shorter than previously known, update distance and previous node
            if distance < distances[neighbor]:
                distances[neighbor] = distance
                previous[neighbor] = current_node
                heapq.heappush(pq, (distance, neighbor))

    # If the end node is not reachable
    if end not in previous:
        return None

    # Reconstruct the shortest path
    path = []
    while end:
        path.append(end)
        end = previous.get(end)
    return path[::-1]

In [8]:
def get_lat_long(building_name):
    # Initialize Nominatim geocoder
    geolocator = Nominatim(user_agent="geoapiExercises")

    # Construct the address string with San Francisco
    address = building_name + ", San Francisco"

    # Try to geocode the address
    location = geolocator.geocode(address)

    # If location found, return latitude and longitude
    if location:
        latitude = location.latitude
        longitude = location.longitude
        return latitude, longitude
    else:
        print(f"Failed to geocode {building_name}")
        return None, None

In [9]:
def haversine(lat1, lon1, lat2, lon2):
    """
    Calculate the great circle distance between two points
    on the Earth's surface using the Haversine formula.
    """
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371  # Radius of Earth in kilometers
    return r * c

In [10]:
def distance(coordinates, latitude, longitude):
    min_distance = float('inf')
    key = None
    for i in coordinates:
        node_distance = haversine(latitude, longitude, coordinates[i][0], coordinates[i][1])
        if node_distance < min_distance:
            min_distance = node_distance
            key = i
    return key

In [51]:
def get_building(building):
    buildings = {'hyatt' : [37.79429458516023, -122.3959216670412],
             'federal reserve bank' : [37.79347115756132, -122.39606150557233],
             'autodesk' : [37.79412348407684, -122.39515481057997],
             'pacific gas' : [37.79161711991032, -122.39636233874825],
             'beale street plaza' : [37.7917535627935, -122.39675995513431],
             'on24' : [37.79132579462402, -122.39624571019917],
             'park tower' : [37.79022928575979, -122.39454532915865],
             'wizeline' : [37.791384271255936, -122.39533474022168],
             'databricks' : [37.791099236856226, -122.39393059431788],
             'ucf' : [37.79088164089771, -122.39216356320853],
             'gap hq' : [37.7904862055847, -122.39062886805068],
             'mira' : [37.79030128363038, -122.39136287056844],
             'rincon apt' : [37.79164848037965, -122.3922755468832],
             'lumina' : [37.78887712878227, -122.39239572459756],
             'woodlands market' : [37.78913605029064, -122.3916839643332]}
    return buildings[building]

In [12]:
coordinates = {'I' : [37.792449682053594, -122.39743244631684],
               'E' : [37.79314490019402, -122.39648830880148],
               'A' : [37.7937926826253, -122.39565743493154],
               'J' : [37.7911682686267, -122.39583260411536],
               'F' : [37.79185283904245, -122.39494706864734],
               'B' : [37.79254322386127, -122.3940707015303],
               'K' : [37.7899323179828, -122.39428970500664],
               'G' : [37.79061846867271, -122.39339682036328],
               'C' : [37.791327201916516, -122.3925363386278],
               'L' : [37.788706730073315, -122.3927653318488],
               'H' : [37.78939758646449, -122.39186182842259],
               'D' : [37.790169485406736, -122.3909632087987]}

graph = {
    'A': {'B': 1, 'E': 1},
    'B': {'A': 1, 'C': 1, 'F': 1},
    'C': {'B': 1, 'D': 1, 'G': 1},
    'D': {'C': 1, 'H': 1},
    'E': {'A': 1, 'F': 1, 'I': 1},
    'F': {'B': 1, 'E': 1, 'G': 1, 'J': 1},
    'G': {'C': 1, 'F': 1, 'H': 1, 'K': 1},
    'H': {'D': 1, 'G': 1, 'L': 1},
    'I': {'E': 1, 'J': 1},
    'J': {'F': 1, 'I': 1, 'K': 1},
    'K': {'G': 1, 'J': 1, 'L': 1},
    'L': {'H': 1, 'K': 1}
}

In [13]:
# updating data gained from api and webscraping in graph edges
# [vehicle, lane, road_blocks]
traffic_information = {
    'A': {'B': [140, 1, 1.0], 'E': [220, 2, 1.0]},
    'B': {'A': [140, 1, 1.0], 'C': [130, 1, 1.0], 'F': [170, 2, 1.0]},
    'C': {'B': [130, 1, 1.0], 'D': [135, 1, 1.0], 'G': [175, 2, 1.0]},
    'D': {'C': [100, 1, 1.0], 'H': [110, 1, 1.0]},
    'E': {'A': [230, 2, 1.0], 'F': [0, 0, 0], 'I': [230, 2, 1.0]},
    'F': {'B': [110, 1, 1.0], 'E': [150, 2, 1.0], 'G': [0, 0, 0], 'J': [125, 1, 1.0]},
    'G': {'C': [100, 1, 1.0], 'F': [235, 3, 1.0], 'H': [105, 1, 1.0], 'K': [145, 2, 1.0]},
    'H': {'D': [95, 1, 1.0], 'G': [190, 2, 1.0], 'L': [95, 1, 1.0]},
    'I': {'E': [240, 2, 1.0], 'J': [180, 3, 1.0]},
    'J': {'F': [130, 1, 1.0], 'I': [0, 0, 0], 'K': [140, 2, 1.0]},
    'K': {'G': [80, 1, 1.0], 'J': [85, 1, 1.0], 'L': [210, 3, 1.0]},
    'L': {'H': [95, 1, 1.0], 'K': [0, 0, 0]}
}

market_street = ['A', 'E', 'I']
mission_street = ['B', 'F', 'J']
howard_street = ['C', 'G', 'K']
folsom_street = ['D', 'H', 'L']

spear_street = ['A', 'B', 'C', 'D']
main_street = ['E', 'F', 'G', 'H']
beale_street = ['I', 'J', 'K', 'L']

for i in articles:
    if "market street" in i:
        for j in range(len(market_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(market_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated market street")

    if "mission street" in i:
        for j in range(len(mission_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(mission_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated mission street")

    if "howard street" in i:
        for j in range(len(howard_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(howard_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated howard street")

    if "folsom street" in i:
        for j in range(len(folsom_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(folsom_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated folsom street")

    if "main street" in i:
        for j in range(len(main_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(main_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated main street")

    if "spear street" in i:
        for j in range(len(spear_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(spear_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated spear street")

    if "beale street" in i:
        for j in range(len(beale_street)-1):
            if traffic_information[j][j+1][1] != 0:
                traffic_information[j][j+1][2] = 0.5
        for j in range(len(beale_street)-1, 0, -1):
            if traffic_information[j][j-1][1] != 0:
                traffic_information[j][j-1][2] = 0.5
        print("updated beale street")

In [49]:
# updating vehicle count based on the new information
for i in traffic_information.keys():
    for j in traffic_information[i].keys():
        if traffic_information[i][j][0] != 0:
            graph[i][j] = traffic_information[i][j][0] / traffic_information[i][j][1] * traffic_information[i][j][2]
        else:
            graph[i][j] = 99999
print("updated graph : ", graph)

updated graph :  {'A': {'B': 140.0, 'E': 110.0}, 'B': {'A': 140.0, 'C': 130.0, 'F': 85.0}, 'C': {'B': 130.0, 'D': 135.0, 'G': 87.5}, 'D': {'C': 100.0, 'H': 110.0}, 'E': {'A': 115.0, 'F': 99999, 'I': 115.0}, 'F': {'B': 110.0, 'E': 75.0, 'G': 99999, 'J': 125.0}, 'G': {'C': 100.0, 'F': 78.33333333333333, 'H': 105.0, 'K': 72.5}, 'H': {'D': 95.0, 'G': 95.0, 'L': 95.0}, 'I': {'E': 120.0, 'J': 60.0}, 'J': {'F': 130.0, 'I': 99999, 'K': 70.0}, 'K': {'G': 80.0, 'J': 85.0, 'L': 70.0}, 'L': {'H': 95.0, 'K': 99999}}


In [55]:
start_place = input("Enter the start location : ").lower()
end_place = input("Enter the end loaction : ").lower()
print("starting point : ", start_place.upper())
print("ending point : ", end_place.upper())

# latitude_start, longitude_start = get_lat_long(start)
latitude_start, longitude_start = get_building(start_place)
if latitude_start is not None and longitude_start is not None:
    print(f"Latitude: {latitude_start}, Longitude: {longitude_start}")
else:
    print("Failed to retrieve latitude and longitude.")

# latitude_end, longitude_end = get_lat_long(end)
latitude_end, longitude_end = get_building(end_place)

if latitude_end is not None and longitude_end is not None:
    print(f"Latitude: {latitude_end}, Longitude: {longitude_end}")
else:
    print("Failed to retrieve latitude and longitude.")

start_coords = [latitude_start, longitude_start]
end_coords = [latitude_end, longitude_end]

start = distance(coordinates, latitude_start, longitude_start)
end = distance(coordinates, latitude_end, longitude_end)
print("start, end : {f1}, {f2}".format(f1=start, f2=end))

shortest_path = dijkstra(graph, start, end)
if shortest_path:
    print("Shortest Path:", shortest_path)
    # print("Total Cost:", sum(graph[shortest_path[i]][shortest_path[i+1]] for i in range(len(shortest_path)-1)))
else:
    print("No path found!")

# Create a Folium map centered at the midpoint of the line
map_center = [(start_coords[0] + end_coords[0]) / 2, (start_coords[1] + end_coords[1]) / 2]
mymap = folium.Map(location=map_center, zoom_start=15)

# Add a polyline connecting the start and end points
for i in range(0,len(shortest_path)-1,1):
    folium.PolyLine(locations=[coordinates[shortest_path[i]], coordinates[shortest_path[i+1]]], color='blue', weight=5).add_to(mymap)
folium.PolyLine(locations=[start_coords, coordinates[start]], color='blue', weight=5).add_to(mymap)
folium.PolyLine(locations=[end_coords, coordinates[end]], color='blue', weight=5).add_to(mymap)

# Add markers for start and end points
folium.Marker(location=start_coords, tooltip=start_place.upper()).add_to(mymap)
folium.Marker(location=end_coords, tooltip=end_place.upper()).add_to(mymap)

# Save the map to an HTML file
mymap.save("output.html")

starting point :  DATABRICKS
ending point :  HYATT
Latitude: 37.791099236856226, Longitude: -122.39393059431788
Latitude: 37.79429458516023, Longitude: -122.3959216670412
start, end : G, A
Shortest Path: ['G', 'F', 'E', 'A']
