In [1]:
!pip install --quiet mdpdf xlwt

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from tqdm import tqdm_notebook as tqdm

import pickle
import json

from glob import glob
from pathlib import Path

from pprint import pprint

from minio import Minio
from minio.error import ResponseError, NoSuchKey

from io import BytesIO

from IPython.utils.io import capture_output

In [3]:
INPUTDIRNAME = "events1min"

In [4]:
df = pd.read_csv("features1min.csv", index_col="Unnamed: 0")

In [5]:
df["vehicleid"] = df.index
df["vehicleid"] = df["vehicleid"].apply(lambda f: int(f.split("/")[-1].split("_")[0]))

In [6]:
df["vehicleid"].value_counts()

6039    7701
4024    6337
4026    5077
6067    4622
4005    3313
6015    3029
4018    2589
6052    2467
4616     759
7016     488
0        317
4908     124
4202      62
1311      23
1367      18
Name: vehicleid, dtype: int64

In [7]:
vehicles = df["vehicleid"].unique()

In [8]:
INPUT_BUCKET_NAME = "odometryclassification"

In [9]:
with open("config.json", "r") as f:
    config = json.load(f)

In [10]:
minioClient = Minio(config["minio_config"]["endpoint_url"].replace("http://","").rstrip("/"),
                    access_key=config["minio_config"]["aws_access_key_id"],
                    secret_key=config["minio_config"]["aws_secret_access_key"],
                    secure=False)

In [11]:
def get_pickle_from_minio(filename):
    try:
        data = minioClient.get_object(INPUT_BUCKET_NAME, filename)
        data = BytesIO(data.read())
        return pickle.load(data)["data"]
    except NoSuchKey:
        return None

In [12]:
def plot_profiles(df, metadata=None, outname=None, title=None):
    plt.figure(figsize=(15,5))
    if metadata is not None:
        vid = metadata['VehicleID']
        rel = metadata['Relative Error (%)']
        com = metadata["Comment"]
        plt.title(f"Speed profiles - VehicleID {vid}, Relative Error = {rel}, reason: {com}")
    plt.plot(df["axle1RawSpeed"], label="axle1RawSpeed")
    plt.plot(df["axle2RawSpeed"], label="axle2RawSpeed")
    plt.plot(df["trainSpeed"], label="trainSpeed")
    plt.legend()
    if title is not None:
        plt.title(title)
    if outname is not None:
        plt.savefig(outname, dpi=100)
        plt.close()
    else:
        plt.show()

In [13]:
data_to_submit = pd.read_csv("data_to_submit.csv", index_col="Unnamed: 0")

In [14]:
data_to_submit["vehicleid"] = data_to_submit.index
data_to_submit["vehicleid"] = data_to_submit["vehicleid"].apply(lambda f: int(f.split("/")[-1].split("_")[0]))

In [15]:
data_to_submit.head()

Unnamed: 0,axle1_minus_train_speed_min,axle1_minus_train_speed_max,axle1_minus_train_speed_mean,axle1_minus_train_speed_std,axle1_minus_train_speed_median,axle1_acc_min,axle1_acc_max,axle1_acc_mean,axle1_acc_std,axle1_acc_median,...,axle1_minus_axle2_integral_pos,axle1_minus_axle2_integral_neg,axle1_minus_train_integral,axle1_minus_train_integral_pos,axle1_minus_train_integral_neg,axle2_minus_train_integral,axle2_minus_train_integral_pos,axle2_minus_train_integral_neg,score,vehicleid
nominal/4005_2019-08-01--02-48-30546000,-10.0,2.5,-0.060667,1.909659,0.3,-1.8,2.0,0.008848,0.206917,0.0,...,166.66,-1.7,-3.6036,2451.46,-23.9,-3.6036,17.05,-25.654,-0.761039,4005
nominal/4005_2020-03-12--05-50-34470000,-5.0,1.1,0.0125,1.092329,0.3,-0.7,0.5,-0.025876,0.195043,0.0,...,8.14,-40.56,0.75,17.74,-16.99,0.75,13.9,-19.73,-0.660193,4005
nominal/4005_2020-03-30--04-34-49551000,-5.0,1.2,-0.147421,1.265055,0.2,-0.5,0.5,-0.025,0.173085,0.0,...,6.15,-21.67,-8.86,18.8,-82.98,-8.86,16.18,-409.08,-0.649543,4005
nominal/4005_2020-02-01--15-16-30916000,-4.7,0.8,0.106,1.018099,0.3,-1.6,0.7,-0.028047,0.195727,0.0,...,50.484,-1.1,6.36,20.31,-13.95,6.36,15.51,-16.45,-0.648923,4005
nominal/4005_2020-03-30--04-19-49551000,-2.4,4.1,0.467055,0.698173,0.5,-1.0,1.0,0.028833,0.218475,0.0,...,11.19,-5.13,28.07,32.24,-8.3817,28.07,93.72,-4.84,-0.636632,4005


In [16]:
feedbacks = list()

for vehicleid in vehicles:
    data_to_submit_v = data_to_submit[data_to_submit["vehicleid"] == vehicleid].copy()
    data_to_submit_v = data_to_submit_v[["score", "vehicleid"]]
    
    data_to_submit_v["timestamp"] = data_to_submit_v.index
    data_to_submit_v["timestamp"] = data_to_submit_v["timestamp"].apply(lambda x: x.split("/")[-1].split("_")[-1])
    date = data_to_submit_v["timestamp"].apply(lambda x: "/".join(x.split("--")[0].split("-")))
    time = data_to_submit_v["timestamp"].apply(lambda x: ":".join(x.split("--")[-1].split("-")))
    data_to_submit_v["timestamp"] = date + " " + time
    
    data_to_submit_v.sort_values("score", inplace=True)

    if vehicleid == 4018:
        feedbacks.append(data_to_submit_v)
    else:
        feedbacks.append(data_to_submit_v.iloc[:20])
    
feedbacks_df = pd.concat(feedbacks)
feedbacks_df["expert label"] = np.nan
feedbacks_df["comment"] = np.nan

feedbacks_df = feedbacks_df[["timestamp", "vehicleid", "score", "expert label", "comment"]]

feedbacks_df.to_excel("feedback_sheet.xls", index=False)

In [17]:
for vehicleid in vehicles:
    print(vehicleid)
        
    mdfile = f"# Events to analyze for VehicleID {vehicleid}\n"
    mdfile += f"![Anomaly score for VehicleID {vehicleid}](anomaly_score_{vehicleid}.png \"Logo Title Text 1\")\n\n"

    data_to_submit_v = data_to_submit[data_to_submit["vehicleid"] == vehicleid].copy()
    data_to_submit_v.sort_values("score", inplace=True)
    if vehicleid != 4018:
        data_to_submit_v = data_to_submit_v.iloc[:20]

    for i,row in data_to_submit_v.iterrows():
        ind = row.name
        ind2 = ind.split("/")[-1]
        vid = int(ind2.split("_")[0])
        dt = ind2.split("_")[-1]
        date = "/".join(dt.split("--")[0].split("-"))
        time = ":".join(dt.split("--")[-1].split("-"))

        df = get_pickle_from_minio(f"{INPUTDIRNAME}/{ind}.pkl")
        o = ind.split("/")[-1]
        outname = f"img/{o}.png"
        title = f"VehicleID {vehicleid} - {date}, {time}, anomaly score = {row.score}"
        plot_profiles(df, outname=outname, title=title)

        mdfile += f"![VehicleID {vehicleid} - {date}, {time}, anomaly score = {row.score}]({outname} \"Logo Title Text 1\")\n"

    with open(f"summary{vehicleid}.md", "w") as f:
        f.write(mdfile)

4005



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


4018
4026
4616
0
1311
1367
4024
4202
4908
6015
6039
6052
6067
7016


In [18]:
from mdpdf.converter import convertMarkdown2Pdf

for f in glob("summary*.md"):
    print(f)
    with capture_output() as captured:
        convertMarkdown2Pdf(Path(f), f.replace(".md", ".pdf"))


summary4024.md
summary4202.md
summary6052.md
summary1367.md
summary4005.md
summary0.md
summary1311.md
summary6067.md
summary4026.md
summary7016.md
summary4018.md
summary6039.md
summary6015.md
summary4616.md
summary4908.md
