In [None]:
# -*- coding: utf-8 -*-
import glob, os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## input file names

In [None]:
input_files = glob.glob("../input/car-ecu-datalogs/2018*.csv", recursive=True)
input_files

## read all data

In [None]:
all_data = None
for i in range(len(input_files)):
    csv_columns = []
    csv_columns.append('time')
    header_number = 0
    channel = ""
    with open(input_files[i]) as f:
        lines = f.readlines()
        for line in lines:
            header_number += 1
            if line.startswith('Channel : '):
                channel = line.replace('Channel : ','').replace('\n','')
            if line.startswith('Type : '):
                csv_columns.append(channel + "[" + line.replace('Type : ','').replace('\n','') + "]")
            if line.startswith('Log : '):
                break
    print("{} header={}  file={}".format(i+1, header_number, input_files[i]))
    df = pd.read_csv(input_files[i], index_col=False, skiprows=header_number, names=csv_columns)
    
    basename = os.path.basename(input_files[i])
    df['date'] = basename.replace('.csv','').split('-')[0]
    route = basename.replace('.csv','').split('-')[1]
    df['route'] = route
    df['time'] = df['date'] + " " + df['time']
    df['time'] = pd.to_datetime( df['time'], format="%Y%m%d %H:%M:%S.%f")
    df['#time_diff'] = df['time'].diff(1).dt.total_seconds()
    df['#time_seq'] = df['#time_diff'].cumsum()
    df['#road_seq'] = df['#time_seq']
    if route == "mimos2home":
        df['#road_seq'] = df['#road_seq'].max() - df['#road_seq']

    if all_data is None:
        all_data = df
    else:
        all_data = pd.concat([all_data, df])
all_data

In [None]:
all_data.info()

## Scatter plot

In [None]:
plt.scatter(all_data['Load[Pressure]'], all_data['RPM[EngineSpeed]'], c=all_data['TargetAFR[AFR]'], cmap='Blues', s=5)
plt.colorbar()
plt.title("engine load and RPM")
plt.xlabel("Load[Pressure]")
plt.ylabel("RPM[EngineSpeed]")
plt.grid(True)

In [None]:
plt.scatter(all_data['#time_seq'], all_data['Load[Pressure]'], c=all_data['TargetAFR[AFR]'], cmap='Blues', s=5)
plt.colorbar()
plt.title("engine load time series")
plt.xlabel("time_seq[sec]")
plt.ylabel("Load[Pressure]")
plt.grid(True)

In [None]:
plt.scatter(all_data['#time_seq'], all_data['RPM[EngineSpeed]'], c=all_data['TargetAFR[AFR]'], cmap='Blues', s=5)
plt.colorbar()
plt.title("engine RPM time series")
plt.xlabel("time_seq[sec]")
plt.ylabel("RPM[EngineSpeed]")
plt.grid(True)