In [1]:
import pandas as pd

df = pd.read_csv("features.csv", parse_dates=["timestamp"])
df = df.sort_values("timestamp").reset_index(drop=True)

In [2]:
# threshold might shift between cars; tune it later using plots
df["occupied_now"] = (df["g1_min"] < 300).astype(int)

In [3]:
import numpy as np

def make_stability_labels(df, horizon_seconds=900):
    df = df.copy()
    df["ts"] = (df["timestamp"] - df["timestamp"].iloc[0]).dt.total_seconds()

    labels = []

    for i in range(len(df)):
        t_now = df.loc[i, "ts"]
        t_future = t_now + horizon_seconds

        mask = (df["ts"] > t_now) & (df["ts"] <= t_future)

        if mask.sum() == 0:
            labels.append(0)
            continue

        occ_now = df.loc[i, "occupied_now"]
        occ_future = df.loc[mask, "occupied_now"]

        will_change = int((occ_future != occ_now).any())
        labels.append(will_change)

    df["will_change"] = labels
    return df

df2 = make_stability_labels(df)


In [4]:
print(df2["will_change"].value_counts())


will_change
0    530
1    470
Name: count, dtype: int64


In [5]:
feature_cols = [
    "g0_min","g1_min","g2_min",
    "g0_mean","g1_mean","g2_mean",
    "mag_norm","mag_norm_diff",
    "tof_min_all","tof_mean_all","tof_mean_all_diff"
]

X = df2[feature_cols].fillna(0)
y = df2["will_change"]


In [6]:
# train logistic regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

clf = LogisticRegression(max_iter=500)
clf.fit(X_train_s, y_train)

preds = clf.predict_proba(X_test_s)[:,1]
print("ROC AUC:", roc_auc_score(y_test, preds))
print(classification_report(y_test, preds>0.5))


ROC AUC: 0.9983921213948347
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       107
           1       0.97      0.98      0.97        93

    accuracy                           0.97       200
   macro avg       0.97      0.98      0.97       200
weighted avg       0.98      0.97      0.98       200



In [7]:
# #decision tree
# from sklearn.tree import DecisionTreeClassifier

# tree = DecisionTreeClassifier(max_depth=5)
# tree.fit(X_train, y_train)

# preds_tree = tree.predict_proba(X_test)[:,1]
# print("ROC AUC:", roc_auc_score(y_test, preds_tree))


In [8]:
import joblib

joblib.dump(clf, "stability_model.pkl")
joblib.dump(scaler, "stability_scaler.pkl")


['stability_scaler.pkl']

In [9]:
from google.colab import files

files.download("stability_model.pkl")
files.download("stability_scaler.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>