# Configuration

In [None]:
%matplotlib inline

import sklearn.cluster as skcl;
import pandas as pd;
import matplotlib.pyplot as plt
import feature_normalization as fn
from bisect import bisect


## Read Data

In [None]:
features = pd.read_csv( filepath_or_buffer="features.txt" , sep="\t" )
features = features.query( 'longest_stop_s >= 0' )
print( features.head( 3 ) );

## Plot data

In [None]:
feature_names = features.columns.values

# start at 1 because 0 is agent id
for i in range(1, len( feature_names ) ):
    #print( "index i=" , i )
    for j in range( i + 1 , len( feature_names ) ):
        #print( "index j=" , j )
        feature1 = feature_names[ i ]
        feature2 = feature_names[ j ]
        plt.scatter( features[ feature1 ] , features[ feature2 ] )
        plt.xlabel( feature1 )
        plt.ylabel( feature2 )
        plt.show()

## Normalize features


In [None]:
norm_features = fn.normalize_features( features )

print( norm_features.head() )


## Train clustering algorithm

### K Means

In [None]:
kmeans = skcl.KMeans(n_clusters=8)

kmeans.fit(norm_features)
pred = kmeans.predict( norm_features )

# start at 1 because 0 is agent id
for i in range(1, len( feature_names ) ):
    #print( "index i=" , i )
    for j in range( i + 1 , len( feature_names ) ):
        #print( "index j=" , j )
        feature1 = feature_names[ i ]
        feature2 = feature_names[ j ]
        plt.scatter( features[ feature1 ] , features[ feature2 ] , c=pred )
        plt.xlabel( feature1 )
        plt.ylabel( feature2 )
        plt.show()

## Meaningful clustering

Clustering based on meaningful boundaries.

First define some charging times. From https://www.clippercreek.com/wp-content/uploads/2016/04/TIME-TO-CHARGE-20170706_FINAL-LOW-RES.jpg, full charging times range from 2 to 70 hours depending on vehicle and charging station.

In [None]:
charge_time_thresholds = [ 2 ** i for i in range(1,10) if (2 ** i <= 24) ]
print( charge_time_thresholds )

Then define range.
- Range of nissan leaf (most common EV): 135km https://en.wikipedia.org/wiki/Nissan_Leaf
- Tesla 85D: 270 miles = 434km https://www.tesla.com/fr_CH/blog/driving-range-model-s-family?redirect=no

Range depends on lots of factors, so we just use a few thresholds starting at 50km up to 400km

In [None]:
range_thresholds = [ 50 * 1000 * 2 ** i for i in range(4) ]
print( range_thresholds )

Now, just generate one label per combination and compute labels

In [None]:
pred_meaning = (features.assign( range_class = list( map( lambda x: bisect( range_thresholds , x ) , features.longest_trip_m ) ),
                  charge_time_class = list( map( lambda x: bisect( charge_time_thresholds , x ) , features.longest_stop_s )))
        .assign( label = lambda x: x.range_class+ 100 * x.charge_time_class))

In [None]:
# start at 1 because 0 is agent id
for i in range(1, len( feature_names ) ):
    #print( "index i=" , i )
    for j in range( i + 1 , len( feature_names ) ):
        #print( "index j=" , j )
        feature1 = feature_names[ i ]
        feature2 = feature_names[ j ]
        plt.scatter( features[ feature1 ] , features[ feature2 ] , c=pred_meaning.label  )
        plt.xlabel( feature1 )
        plt.ylabel( feature2 )
        plt.show()