# Frequent Item Mining

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import geopandas as gpd
from tqdm import tqdm
import pandas as pd
import folium
import json
import ast



pd.set_option('display.max_columns', None)
pd.set_option('mode.chained_assignment', None)

In [2]:
res = pd.read_pickle("data/computed/matches_clean_EWT_delay.pkl")
tt = res[["route_short_name","mode"]].drop_duplicates()
del res

In [3]:
data = pd.read_pickle("data/computed/lines_growth.pkl")
data = data.merge(
    tt,
    "left",
    "route_short_name"
)

In [4]:
data = data.groupby(["route_short_name", "date_normalized", "direction_id", "mode", "lab_growth"], as_index=False).apply(lambda x: pd.Series({"stops" : [x.stop_name.to_list()]}))

## Delay creators stops

In [5]:
gen = [t for x in data[(data.lab_growth == 1)]["stops"].to_list() for t in x ]
metro_del = [t for x in data[(data.lab_growth == 1) & (data["mode"] == "M")]["stops"].to_list() for t in x ]
tram_del = [t for x in data[(data.lab_growth == 1) & (data["mode"] == "T")]["stops"].to_list() for t in x ]
bus_del = [t for x in data[(data.lab_growth == 1) & (data["mode"] == "B")]["stops"].to_list() for t in x ]

In [6]:
config = [{"data" : metro_del, "type" : "M", "sup": 0.45}, {"data" : tram_del, "type" : "T", "sup": 0.05}, {"data" : bus_del, "type" : "B", "sup": 0.03}]


config_gen = [{"data" : gen, "type" : "M", "sup": 0.03}]

In [7]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

for _ in config:
    print("=============")
    print(_["type"])
    print("transaction encoder")
    te = TransactionEncoder()
    print("fitting the te")
    te_ary = te.fit(_["data"]).transform(_["data"])
    df_late = pd.DataFrame(te_ary, columns=te.columns_)
    print("Computing FP")
    fp_late = fpgrowth(df_late, min_support=_["sup"], use_colnames=True)
    _["stops_patterns"] = np.unique(np.array([t for k in fp_late.itemsets.apply(lambda x: list(x)) for t in k]))



M
transaction encoder
fitting the te
Computing FP
T
transaction encoder
fitting the te
Computing FP
B
transaction encoder
fitting the te
Computing FP


In [8]:
config[0]["stops_patterns"].shape, config[1]["stops_patterns"].shape, config[2]["stops_patterns"].shape

((4,), (79,), (70,))

In [9]:
config_gen[0]["stops_patterns"].shape

KeyError: 'stops_patterns'

In [10]:
plot = []

for item in config:
    plot.extend([{"mode" : item["type"], "stop_name" : x} for x in item["stops_patterns"]])

delay_stops = pd.DataFrame(plot)


### Plot stops

In [11]:
stops = pd.read_csv("data/timetables/gtfs3Sept/stops.txt")
coord = stops[["stop_name", "stop_lat", "stop_lon"]].drop_duplicates("stop_name")

In [12]:
delay_stops = delay_stops.merge(
    coord,
    "left",
    "stop_name"
)

In [13]:
delay_stops

Unnamed: 0,mode,stop_name,stop_lat,stop_lon
0,M,ARTS-LOI,50.845476,4.369121
1,M,BEEKKANT,50.854237,4.323407
2,M,DELACROIX,50.845805,4.323060
3,M,GARE DE L'OUEST,50.848351,4.322094
4,T,ALBERT,50.821090,4.343888
...,...,...,...,...
148,B,VANDENHOVEN,50.882600,4.407090
149,B,VICTOR HUGO,50.847707,4.397407
150,B,WIELS,50.824159,4.325394
151,B,WILLEBROEK,50.863306,4.353795


In [14]:
px.set_mapbox_access_token("pk.eyJ1IjoibWpkYW91ZGkiLCJhIjoiY2xibm54OThyMGdyOTNvcnhqeTYyZmRuYiJ9.rfxe3z8triwA5yvV1XZA-A")
fig = px.scatter_mapbox(delay_stops, lat="stop_lat", lon="stop_lon", hover_name="stop_name",
                        color="mode", zoom=10)
fig.update_layout(mapbox_style="light")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



## Delay catcher

In [15]:
gen = [t for x in data[(data.lab_growth == -1)]["stops"].to_list() for t in x ]
metro_cat = [t for x in data[(data.lab_growth == -1) & (data["mode"] == "M")]["stops"].to_list() for t in x ]
tram_cat = [t for x in data[(data.lab_growth == -1) & (data["mode"] == "T")]["stops"].to_list() for t in x ]
bus_cat = [t for x in data[(data.lab_growth == -1) & (data["mode"] == "B")]["stops"].to_list() for t in x ]

In [16]:
config_cat = [{"data" : metro_cat, "type" : "M", "sup": 0.45}, {"data" : tram_cat, "type" : "T", "sup": 0.05}, {"data" : bus_cat, "type" : "B", "sup": 0.03}]


config_gen_cat = [{"data" : gen, "type" : "M", "sup": 0.03}]

In [17]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth

for _ in config_cat:
    print("=============")
    print(_["type"])
    print("transaction encoder")
    te = TransactionEncoder()
    print("fitting the te")
    te_ary = te.fit(_["data"]).transform(_["data"])
    df_late = pd.DataFrame(te_ary, columns=te.columns_)
    print("Computing FP")
    fp_late = fpgrowth(df_late, min_support=_["sup"], use_colnames=True)
    _["stops_patterns"] = np.unique(np.array([t for k in fp_late.itemsets.apply(lambda x: list(x)) for t in k]))

M
transaction encoder
fitting the te
Computing FP
T
transaction encoder
fitting the te
Computing FP
B
transaction encoder
fitting the te
Computing FP


In [18]:
config_cat[0]["stops_patterns"].shape, config_cat[1]["stops_patterns"].shape, config_cat[2]["stops_patterns"].shape

((1,), (46,), (19,))

In [19]:
plot = []

for item in config_cat:
    plot.extend([{"mode" : item["type"], "stop_name" : x} for x in item["stops_patterns"]])

delay_stops_cat = pd.DataFrame(plot)
delay_stops_cat = delay_stops_cat.merge(
    coord,
    "left",
    "stop_name"
)

In [20]:
delay_stops_cat

Unnamed: 0,mode,stop_name,stop_lat,stop_lon
0,M,ARTS-LOI,50.845476,4.369121
1,T,ALPHONSE XIII,50.791040,4.368950
2,T,ARSENAL,50.826393,4.396727
3,T,ARTS ET METIERS,50.847997,4.337780
4,T,AZUR,50.866659,4.287128
...,...,...,...,...
61,B,SCIENCE,50.839842,4.369371
62,B,TOUR ET TAXIS,50.864968,4.350239
63,B,TRONE,50.841633,4.364862
64,B,WILLEBROEK,50.863306,4.353795


In [21]:
px.set_mapbox_access_token("pk.eyJ1IjoibWpkYW91ZGkiLCJhIjoiY2xibm54OThyMGdyOTNvcnhqeTYyZmRuYiJ9.rfxe3z8triwA5yvV1XZA-A")
fig = px.scatter_mapbox(delay_stops_cat, lat="stop_lat", lon="stop_lon", hover_name="stop_name",
                        color="mode", zoom=10)
fig.update_layout(mapbox_style="light")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [22]:
delay_stops_cat["cat"] = "catcher"
delay_stops["cat"] = "delayer"

In [25]:
pd.concat([delay_stops, delay_stops_cat]).to_pickle("data/computed/freq_mining.pkl")

## Buses

In [28]:
Buses = pd.concat([delay_stops[delay_stops["mode"] == "B"], delay_stops_cat[delay_stops_cat["mode"] == "B"]])
Buses = Buses[~Buses.stop_name.isin(Buses[Buses.duplicated(subset="stop_name")].stop_name)]

In [43]:
px.set_mapbox_access_token("pk.eyJ1IjoibWpkYW91ZGkiLCJhIjoiY2xibm54OThyMGdyOTNvcnhqeTYyZmRuYiJ9.rfxe3z8triwA5yvV1XZA-A")
fig = px.scatter_mapbox(Buses, lat="stop_lat", lon="stop_lon", hover_name="stop_name",
                        color="cat", zoom=10)
fig.update_layout(mapbox_style="light")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



## Tram

In [42]:
tram = pd.concat([delay_stops[delay_stops["mode"] == "T"], delay_stops_cat[delay_stops_cat["mode"] == "T"]])
tram = tram[~tram.stop_name.isin(tram[tram.duplicated(subset="stop_name")].stop_name)]

px.set_mapbox_access_token("pk.eyJ1IjoibWpkYW91ZGkiLCJhIjoiY2xibm54OThyMGdyOTNvcnhqeTYyZmRuYiJ9.rfxe3z8triwA5yvV1XZA-A")
fig = px.scatter_mapbox(tram, lat="stop_lat", lon="stop_lon", hover_name="stop_name",
                        color="cat", zoom=10)
fig.update_layout(mapbox_style="light")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



## Metro

In [41]:
metro = pd.concat([delay_stops[delay_stops["mode"] == "M"], delay_stops_cat[delay_stops_cat["mode"] == "M"]])
metro = metro[~metro.stop_name.isin(metro[metro.duplicated(subset="stop_name")].stop_name)]

px.set_mapbox_access_token("pk.eyJ1IjoibWpkYW91ZGkiLCJhIjoiY2xibm54OThyMGdyOTNvcnhqeTYyZmRuYiJ9.rfxe3z8triwA5yvV1XZA-A")
fig = px.scatter_mapbox(metro, lat="stop_lat", lon="stop_lon", hover_name="stop_name",
                        color="cat", zoom=10)
fig.update_layout(mapbox_style="light")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.

