In [None]:
import pandas
import os
import folium
import matplotlib.pyplot as plt
import pyarrow.parquet as pq


In [None]:
int_columns = []
float_columns = ["fsq_latitude_source","fsq_longitude_source","fsq_latitude_destination","fsq_longitude_destination","distance_m"]
bool_columns = []
datetime_columns = []
category_columns = []
string_columns = ["fsq_place_id_source","fsq_place_id_destination"]

In [None]:
def get_typed_df(df):
    current_columns = df.columns
    for col in current_columns:
        if col in string_columns:
            df[col] = df[col].astype("string")
        elif col in float_columns:
            df[col] = pandas.to_numeric(df[col], downcast="float")
        elif col in int_columns:
            df[col] = pandas.to_numeric(df[col], downcast="integer")
        elif col in bool_columns:
            df[col] = df[col].astype("boolean")
        elif col in datetime_columns:
            df[col] = pandas.to_datetime(df[col])
        elif col in category_columns:
            df[col] = df[col].astype("category")
        else:
            df[col] = df[col].astype("string")
    return df

In [None]:
def get_pyarrow_table(csv_path,columns=None):
    parquet_path = csv_path.replace(".csv", ".parquet")
    if os.path.exists(parquet_path):
        return pq.read_table(parquet_path, memory_map=True, columns=columns)
    else:
        df = pandas.read_csv(csv_path)
        df = get_typed_df(df)
        df.to_parquet(parquet_path)
        return df


In [None]:
# graph_df = pandas.read_csv("../../data/fsq_graph_10_visualization.csv", memory_map=True)
# graph_df = get_typed_df(graph_df)


In [None]:
def get_small_graph(graph_table):
    # 33.734413222285156, -84.40355678314575
    # 33.7664799038100
    # min_lat = 33.75
    # max_lat = 33.85
    # min_lon = -84.50
    # max_lon = -84.25
    # 33.79130830315423, -84.33371698363095 to 33.80415898992885, -84.31803670061207
    min_lat = 33.79
    max_lat = 33.81
    min_lon = -84.34
    max_lon = -84.31
    graph_df = graph_table.to_pandas()
    small_graph_df = graph_df[graph_df["fsq_latitude_source"] > min_lat]
    small_graph_df = small_graph_df[small_graph_df["fsq_latitude_source"] < max_lat]
    small_graph_df = small_graph_df[small_graph_df["fsq_longitude_source"] > min_lon]
    small_graph_df = small_graph_df[small_graph_df["fsq_longitude_source"] < max_lon]
    return small_graph_df

In [None]:
graph_table = get_pyarrow_table("../../data/fsq_graph_10_visualization.csv",columns=["fsq_place_id_source","fsq_latitude_source","fsq_longitude_source"])

In [None]:
def get_scaled_values(series, new_min, new_max):
    min_val = series.min()
    max_val = series.max()
    scaled_series = (series - min_val) / (max_val - min_val) * (new_max - new_min) + new_min
    return scaled_series
small_graph = get_small_graph(graph_table)
# small_graph["distance_m"] = get_scaled_values(small_graph["distance_m"], 0, 1)



In [None]:
small_graph
for row in small_graph.itertuples():
    print(f"{row.fsq_place_id_source}, {round(row.fsq_latitude_source,7)}, {round(row.fsq_longitude_source,7)}")

In [None]:
def create_html_map(df, save_path="df_visualization.html"):
    m = folium.Map(location=[df["fsq_latitude_source"].mean(), df["fsq_longitude_source"].mean()], zoom_start=10)
    for index, row in df.iterrows():
        
        folium.PolyLine(
            locations=[
                [row["fsq_latitude_source"], row["fsq_longitude_source"]],
                [row["fsq_latitude_destination"], row["fsq_longitude_destination"]],
            ],
            color="red",
            weight=2,
            opacity=1-row["distance_m"],
        ).add_to(m)
        folium.CircleMarker(
            location=[row["fsq_latitude_source"], row["fsq_longitude_source"]],
            radius=4,
            color="blue",
            fill=True,
            fill_color="blue",
            fill_opacity=1,
        ).add_to(m)
        folium.CircleMarker(
            location=[row["fsq_latitude_destination"], row["fsq_longitude_destination"]],
            radius=4,
            color="blue",
            fill=True,
            fill_color="blue",
            fill_opacity=1,
        ).add_to(m)
    m.save(save_path)
create_html_map(small_graph, "../../visualization/small_graph.html")


In [None]:
temp_table = graph_table[:5]
columns = temp_table.column_names
fsq_longitude_source_index = columns.index("fsq_longitude_source")
fsq_latitude_source_index = columns.index("fsq_latitude_source")
fsq_longitude_destination_index = columns.index("fsq_longitude_destination")
fsq_latitude_destination_index = columns.index("fsq_latitude_destination")
print(temp_table)
# table elements all  # to str
temp_table = [[str(cell) for cell in row] for row in temp_table]

print(temp_table[1][0])
for i in range(0, len(temp_table)):
    longitude_source = temp_table[fsq_longitude_source_index][i]
    latitude_source = temp_table[fsq_latitude_source_index][i]
    longitude_destination = temp_table[fsq_longitude_destination_index][i]
    latitude_destination = temp_table[fsq_latitude_destination_index][i]
    print(longitude_source, latitude_source, longitude_destination, latitude_destination)


In [None]:
def create_png_map(table, save_path="table_visualization.png"):
    # pull numeric arrays from columns
    lon_s = table["fsq_longitude_source"].to_numpy(zero_copy_only=False)
    lat_s = table["fsq_latitude_source"].to_numpy(zero_copy_only=False)
    lon_d = table["fsq_longitude_destination"].to_numpy(zero_copy_only=False)
    lat_d = table["fsq_latitude_destination"].to_numpy(zero_copy_only=False)

    fig, ax = plt.subplots(figsize=(10, 10))
    ax.set_xlim(-180, 180)
    ax.set_ylim(-90, 90)

    n = table.num_rows
    for i in range(n):
        ax.plot([lon_s[i], lon_d[i]], [lat_s[i], lat_d[i]],
                color="red", linewidth=1, alpha=0.7)

    ax.set_axis_off()
    plt.savefig(save_path, bbox_inches="tight", pad_inches=0.1, dpi=600, transparent=True)
    plt.close()
# create_png_map(graph_table, "../../visualization/graph_table.png")