In [2]:
import pandas as pd
import numpy as np
import warnings
import datetime

warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("bookinghistory.csv", low_memory=False)

In [4]:
df.dropna(subset=['battery_level_at_start','battery_level_at_end'], inplace=True)
df.reset_index(drop=True, inplace=True)

### Sort dataframe by date & delta battery column

In [5]:
df['start_date_time']= pd.to_datetime(df['booking_start_date__date'].astype(str), infer_datetime_format=True)
df['end_date_time']= pd.to_datetime(df['booking_end_date__date'].astype(str), infer_datetime_format=True)

In [6]:
df = df.sort_values(by="start_date_time")

In [7]:
df

Unnamed: 0,id,bookind_id,plate_number,driver_login,driver,start_adress,start_location,end_adress,end_location,mileage_start,...,reason_for_no_pfr,bonus_malus,car_model,account,booking_id,car_history_match,city,otoqi_reference,start_date_time,end_date_time
27926,7be46c2b-95c8-11ec-8b92-000d3a89d198,,FZ-433-RF,ClientLogin,Client,"28 Rue Gambetta, 92100 Boulogne-Billancourt, F...","48.84569168, 2.24077797","98 Rue St Charles, 75015 Paris, France","48.84556198, 2.28458261",3524.0,...,Not yet analysed,0.0,Spring,Zity,RZTY8785693,,Paris,,2022-02-25 00:00:34,2022-02-25 00:19:20
4036,12333b60-95c7-11ec-8b92-000d3a89d198,,FB-654-TV,frederic-josso@hotmail.fr,Frederic Pierre Jean Josso,"62bis Ave Parmentier, 75011 Paris, France","48.86286926, 2.37632394","62bis Ave Parmentier, 75011 Paris, France","48.86286545, 2.3763237",17391.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY8785697,,Paris,,2022-02-25 00:01:31,2022-02-25 00:05:25
39679,af641031-95cc-11ec-8b92-000d3a89d198,,FZ-271-RF,ClientLogin,Client,"22 Av. Pierre Grenier, 92100 Boulogne-Billanco...","48.82782745, 2.24987483","32 Rue de la Montagne Ste Geneviève, 75005 Par...","48.84811783, 2.34846044",5339.0,...,Not yet analysed,0.0,Spring,Zity,RZTY8785699,,Paris,,2022-02-25 00:02:13,2022-02-25 00:47:39
27925,7be46c29-95c8-11ec-8b92-000d3a89d198,,FB-444-YK,demba.sacko20@gmail.com,Demba Sacko,"47 Rue Olivier Métra, 75020 Paris, France","48.87382126, 2.3932271","47 Rue Olivier Métra, 75020 Paris, France","48.87381744, 2.3932271",19021.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY8785708,,Paris,,2022-02-25 00:04:11,2022-02-25 00:16:33
50124,ddf55b50-95c9-11ec-8b92-000d3a89d198,,FA-592-MN,ClientLogin,Client,"19 Av. Georges Lafenestre, 75014 Paris, France","48.82393265, 2.308918","16 Av. Marc Sangnier, 75014 Paris, France","48.8261528, 2.30654097",18257.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY8785712,,Paris,,2022-02-25 00:05:08,2022-02-25 00:23:18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10944,30483e74-c5ae-11ec-8b94-000d3a89d198,,FA-130-MP,mohamadou.niakate@gmail.com,Mohamadou Niakate,"26 Rue Lauriston, 75116 Paris, France","48.87106323, 2.29174709","6 Rue Christine de Pisan, 75017 Paris, France","48.88921356, 2.30908895",10185.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY9087111,,Paris,,2022-04-26 23:51:53,2022-04-27 00:11:32
32807,91ac110c-c5af-11ec-8b94-000d3a89d198,,FA-584-MN,ClientLogin,Client,"55 Rue Meslay, 75003 Paris, France","48.86825562, 2.35733485","99 Rue Pelleport, 75020 Paris, France","48.87021637, 2.39963436",22337.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY9087115,,Paris,,2022-04-26 23:52:24,2022-04-27 00:23:29
21168,5cf2c272-c5b2-11ec-8b94-000d3a89d198,,FC-284-FM,ClientLogin,Client,"9 Av. de la Prte de Choisy, 75013 Paris, France","48.81817245, 2.366431","56 Rue de Nantes, 75019 Paris, France","48.89306641, 2.380229",20993.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY9087120,,Paris,,2022-04-26 23:54:36,2022-04-27 00:34:22
10943,30483e73-c5ae-11ec-8b94-000d3a89d198,,FA-396-MP,ClientLogin,Client,"148 Bd de la Villette, 75019 Paris, France","48.87966919, 2.37061691","3 Rue Hippolyte Lebas, 75009 Paris, France","48.87690353, 2.34131694",18297.0,...,Not yet analysed,0.0,ZOE,Zity,RZTY9087124,,Paris,,2022-04-26 23:55:49,2022-04-27 00:11:21


In [8]:
df["charging"] = (df['battery_level_at_end']-df['battery_level_at_start']>1)

In [9]:
df['charging'].value_counts()

False    56924
True       822
Name: charging, dtype: int64

### Remove bookings with no change of battery

In [10]:
df['battery_level_at_end'] = pd.to_numeric(df['battery_level_at_end']).astype(int)
df['battery_level_at_start'] = pd.to_numeric(df['battery_level_at_start']).astype(int)

In [11]:
df['delta'] = df['battery_level_at_end']-df['battery_level_at_start']

In [12]:
df['battery_changed'] = (df['battery_level_at_end']-df['battery_level_at_start']!=0)

In [13]:
df = df[df['battery_changed']==True]

In [14]:
# df.drop(['battery_changed'], axis=1, inplace = True)

### Utils

In [15]:
def getSessions(dataframe):
    """
        Returns charging sessions for input dataframe with only one plate number
    """
    dataframe.reset_index(drop=True, inplace=True)
    plate = dataframe.loc[0, 'plate_number']
    model = dataframe.loc[0, 'car_model']
    dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
    dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
    dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
    dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
    dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
    for index in range(len(dataframe)):
        if dataframe.loc[index, 'battery_drop']/dataframe.loc[index, 'delta_hours'] > 1:
            dataframe.loc[index, 'battery_level_at_start'] = dataframe.loc[index, 'last_end_battery']
            dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
            dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
            dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
            dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
            dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
            dataframe['delta'] = dataframe['battery_level_at_end']-dataframe['battery_level_at_start']
        if (dataframe.loc[index, 'battery_level_at_end'] - dataframe.loc[index, 'battery_level_at_start'])/dataframe.loc[index, 'duration']>5:
            dataframe.loc[index, 'battery_level_at_end'] = dataframe.loc[index, 'battery_level_at_start']
            dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
            dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
            dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
            dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
            dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
            dataframe['delta'] = dataframe['battery_level_at_end']-dataframe['battery_level_at_start']

    
    dataframe = dataframe[dataframe['delta']!=0]
    dataframe.reset_index(drop=True, inplace=True)
    dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
    dataframe['was_charging']=(dataframe['battery_level_at_start']-dataframe['last_end_battery']>0)
    dataframe["charging"] = (dataframe['battery_level_at_end']-dataframe['battery_level_at_start']>1)
    charging_sessions = []
    index = 0
    current_start = 0
    start = ""
    start_battery = 0
    current_last = 0
    open_booking = False
    while index < len(dataframe) :
        if open_booking: # car was charging
            if not dataframe.loc[index, 'charging']: # car is not charging anymore --> close session
                open_booking = False
                if dataframe.loc[index, 'was_charging']:
                    charging_sessions.append((plate, model, dataframe.loc[current_start, start+'date_time'], dataframe.loc[index, 'start_date_time'], start_battery, dataframe.loc[index, 'battery_level_at_start'], dataframe.loc[index-1, 'start_adress'], dataframe.loc[index-1, 'driver']))
                else :
                    charging_sessions.append((plate, model, dataframe.loc[current_start, start+'date_time'], dataframe.loc[index-1, 'end_date_time'], start_battery, dataframe.loc[current_last, 'battery_level_at_start'], dataframe.loc[index-1, 'start_adress'], dataframe.loc[index-1, 'driver']))
                
            else : # car is still charging
                current_last = index
        else : # car wasn't charging
            if dataframe.loc[index, 'was_charging']: # the car was charged since last booking
                open_booking = True
                current_start = index-1
                start = 'end_'
                current_last = index
                start_battery = dataframe.loc[current_start, 'battery_level_at_end']
                if not dataframe.loc[index, 'charging']: # car is not charging anymore --> close session
                    open_booking = False
                    charging_sessions.append((plate, model, dataframe.loc[current_start, start+'date_time'], dataframe.loc[index, 'start_date_time'], start_battery, dataframe.loc[index, 'battery_level_at_start'], dataframe.loc[current_start, 'end_adress'], dataframe.loc[index-1, 'driver']))
            else :
                if dataframe.loc[index, 'charging']:
                    open_booking = True
                    current_start = index
                    start = "start_"
                    current_last = index
                    start_battery = dataframe.loc[index, 'battery_level_at_start']
                
        index += 1
    return charging_sessions

In [16]:
plates = df.plate_number.unique()
plate = plates[0]

In [17]:
dataframe = df[df['plate_number']==plate]

In [18]:
dataframe.reset_index(drop=True, inplace=True)
plate = dataframe.loc[0, 'plate_number']
model = dataframe.loc[0, 'car_model']
dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
for index in range(len(dataframe)):
    if (dataframe.loc[index, 'battery_drop']/dataframe.loc[index, 'delta_hours'] > 1):
        dataframe.loc[index, 'battery_level_at_start'] = dataframe.loc[index, 'last_end_battery']
        dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
        dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
        dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
        dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
        dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
        dataframe['delta'] = dataframe['battery_level_at_end']-dataframe['battery_level_at_start']
    if (dataframe.loc[index, 'battery_level_at_end'] - dataframe.loc[index, 'battery_level_at_start'])/dataframe.loc[index, 'duration']>5:
        dataframe.loc[index, 'battery_level_at_end'] = dataframe.loc[index, 'battery_level_at_start']
        dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
        dataframe['last_end_date']=dataframe['end_date_time'].shift(1)
        dataframe['delta_hours'] = (dataframe['start_date_time']-dataframe['last_end_date']).astype('timedelta64[h]')
        dataframe['duration'] = (dataframe['end_date_time']-dataframe['start_date_time']).astype('timedelta64[h]')
        dataframe['battery_drop'] = dataframe['last_end_battery']-dataframe['battery_level_at_start']
        dataframe['delta'] = dataframe['battery_level_at_end']-dataframe['battery_level_at_start']

        
dataframe = dataframe[dataframe['delta']!=0]
dataframe.reset_index(drop=True, inplace=True)
dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
dataframe['was_charging']=(dataframe['battery_level_at_start']-dataframe['last_end_battery']>0)
dataframe["charging"] = (dataframe['battery_level_at_end']-dataframe['battery_level_at_start']>1)

In [19]:
dataframe.reset_index(drop=True, inplace=True)
model = dataframe.loc[0, 'car_model']
max_index = len(dataframe)
dataframe['last_end_battery']=dataframe['battery_level_at_end'].shift(1)
dataframe['was_charging']=(dataframe['battery_level_at_start']-dataframe['last_end_battery']>1)
test_df = dataframe.loc[:, ['last_end_battery', 'battery_level_at_start', 'battery_level_at_end', 'start_date_time', 'end_date_time', 'was_charging', 'charging', 'battery_changed']]
# test_df.head(50)

In [20]:
test_df = dataframe.loc[:, ['last_end_battery', 'battery_level_at_start', 'battery_level_at_end', 'start_date_time', 'end_date_time', 'was_charging', 'charging', 'battery_changed', 'delta', 'start_adress', 'end_adress']]
# test_df

In [21]:
sessions = []
for plate in plates :
    dataframe = df[df['plate_number']==plate]
    sessions += getSessions(dataframe)
    
sessions_df = pd.DataFrame(sessions, columns = ['plate_number', 'model', 'start_date', 'end_date', 'start_battery', 'end_battery', 'adress', 'driver'])

In [22]:
sessions_df

Unnamed: 0,plate_number,model,start_date,end_date,start_battery,end_battery,adress,driver
0,FZ-433-RF,Spring,2022-02-25 18:08:28,2022-02-26 09:20:14,28.0,29.0,"51 Rue Dombasle, 75015 Paris, France",Client
1,FZ-433-RF,Spring,2022-02-26 14:27:35,2022-02-27 23:06:19,20.0,100.0,"10 All. Vauban, 92130 Issy-les-Moulineaux, France",Client
2,FZ-433-RF,Spring,2022-03-08 04:02:03,2022-03-08 16:25:14,5.0,6.0,"10 Rue Jean Richepin, 75116 Paris, France",Client
3,FZ-433-RF,Spring,2022-03-08 17:27:42,2022-03-08 19:44:47,5.0,100.0,"54 Rue du Moulin des Prés, 75013 Paris, France",Michael Maurice Fernand Genet
4,FZ-433-RF,Spring,2022-03-13 04:20:55,2022-03-14 09:12:09,6.0,100.0,"25 Rue Geoffroy-Saint-Hilaire Hall 19, 75005 P...",Sseire Sylla
...,...,...,...,...,...,...,...,...
6822,GD-526-TQ,Spring,2022-04-21 20:19:06,2022-04-22 09:23:33,39.0,40.0,"64 Rue de lAncienne Mairie, 92100 Boulogne-Bil...",Client
6823,GD-229-TR,Spring,2022-04-21 13:03:10,2022-04-21 19:29:13,43.0,44.0,"17 Rue Marcel Allégot, 92190 Meudon, France",Client
6824,GD-229-TR,Spring,2022-04-22 01:33:11,2022-04-22 18:36:54,17.0,20.0,"18 Av. Junot, 75018 Paris, France",Client
6825,GD-229-TR,Spring,2022-04-22 22:34:52,2022-04-23 00:03:34,11.0,98.0,"15 Rue Censier, 75005 Paris, France",Jean Randriamahazomanana


In [23]:
sessions_df['delta'] = (sessions_df['end_battery']-sessions_df['start_battery'])
sessions_df = sessions_df[sessions_df['delta'] > 3]
sessions_df['month'] = sessions_df['start_date'].dt.month
month_dict = {2:"Février", 3: "Mars", 4:"Avril"}
sessions_df['month'] = sessions_df['month'].map(month_dict)
sessions_df['date0'] = pd.to_datetime([datetime.date(year=2022, month=1, day=1) for x in sessions_df.iterrows()])
sessions_df['day']=(sessions_df['start_date']-sessions_df['date0']).dt.days+1
sessions_df['created_id']=sessions_df['plate_number']+sessions_df['driver']+sessions_df['day'].astype(str)

In [24]:
sessions_df

Unnamed: 0,plate_number,model,start_date,end_date,start_battery,end_battery,adress,driver,delta,month,date0,day,created_id
1,FZ-433-RF,Spring,2022-02-26 14:27:35,2022-02-27 23:06:19,20.0,100.0,"10 All. Vauban, 92130 Issy-les-Moulineaux, France",Client,80.0,Février,2022-01-01,57,FZ-433-RFClient57
3,FZ-433-RF,Spring,2022-03-08 17:27:42,2022-03-08 19:44:47,5.0,100.0,"54 Rue du Moulin des Prés, 75013 Paris, France",Michael Maurice Fernand Genet,95.0,Mars,2022-01-01,67,FZ-433-RFMichael Maurice Fernand Genet 67
4,FZ-433-RF,Spring,2022-03-13 04:20:55,2022-03-14 09:12:09,6.0,100.0,"25 Rue Geoffroy-Saint-Hilaire Hall 19, 75005 P...",Sseire Sylla,94.0,Mars,2022-01-01,72,FZ-433-RFSseire Sylla 72
5,FZ-433-RF,Spring,2022-03-20 07:55:47,2022-03-20 10:18:59,5.0,100.0,"175B Rue de Tolbiac, 75013 Paris, France",Ivan Mvibudulu Nganga,95.0,Mars,2022-01-01,79,FZ-433-RFIvan Mvibudulu Nganga79
6,FZ-433-RF,Spring,2022-03-22 18:31:12,2022-03-24 14:53:19,15.0,100.0,"13 Rue Heinrich, 92100 Boulogne-Billancourt, F...",Alan Norez,85.0,Mars,2022-01-01,81,FZ-433-RFAlan Norez81
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6817,GD-698-TQ,Spring,2022-04-16 22:50:53,2022-04-17 19:12:12,13.0,100.0,"21 Rue de lÉcole de Médecine, 75006 Paris, France",Alexandre Makunga,87.0,Avril,2022-01-01,106,GD-698-TQAlexandre Makunga106
6818,GD-698-TQ,Spring,2022-04-23 21:26:52,2022-04-24 18:24:14,8.0,100.0,"47 Rue Olivier Métra, 75020 Paris, France",Moke N Kazi,92.0,Avril,2022-01-01,113,GD-698-TQMoke N Kazi113
6821,GD-998-TQ,Spring,2022-04-18 14:26:59,2022-04-20 20:40:57,10.0,100.0,"54 Rue du Moulin des Prés, 75013 Paris, France",Hakim Rahmouni,90.0,Avril,2022-01-01,108,GD-998-TQHakim Rahmouni108
6825,GD-229-TR,Spring,2022-04-22 22:34:52,2022-04-23 00:03:34,11.0,98.0,"15 Rue Censier, 75005 Paris, France",Jean Randriamahazomanana,87.0,Avril,2022-01-01,112,GD-229-TRJean Randriamahazomanana 112


In [25]:
sessions_df['driver'].value_counts()

Client                                  901
Van Huy NGUYEN                          377
Moke N Kazi                             284
Madil Berkani                           185
Mohamadou Niakate                       164
Ivan Mvibudulu Nganga                   153
Demba  Sacko                            147
Solal Cuinet                            129
Mathias Riguet                          115
Alexandre Makunga                       107
Jean Randriamahazomanana                102
Jonathan Fleret                          88
Lassana Cissako                          85
Djegui Toure                             77
Daniel Da Silva Meneses Campos           67
Valentin Gummert                         64
Sseire Sylla                             62
Michael Maurice Fernand Genet            62
Ayi paul Atayi                           58
Mansour  torkhani                        47
Nicolas DESMEDT                          43
Mourad Mabrouk                           42
Herve BWASI-LUBAYA              

In [33]:
previous_df = pd.read_csv("Last version elec - result_charge_start1.csv")
previous_df = previous_df[['plate_number', 'car_model', 'battery_level_at_start', 'battery_level_at_end', 'battery_volume_change', 'day', 'created_id']]

In [34]:
previous_df

Unnamed: 0,plate_number,car_model,battery_level_at_start,battery_level_at_end,battery_volume_change,day,created_id
0,GB-970-JX,Spring,0,0,100,93,GB-970-JXSseireSylla93
1,GB-970-JX,Spring,7,7,93,99,GB-970-JXDjeguiToure99
2,GB-970-JX,Spring,50,49,51,103,GB-970-JXJosephPagal103
3,GB-970-JX,Spring,17,12,72,108,GB-970-JXAyipaulAtayi108
4,GB-970-JX,Spring,11,11,89,115,GB-970-JXLassanaCissako115
...,...,...,...,...,...,...,...
4045,FZ-326-RF,Spring,14,14,42,108,FZ-326-RFMadilBerkani108
4046,FZ-326-RF,Spring,12,13,87,119,FZ-326-RFSseireSylla119
4047,FZ-326-RF,Spring,14,34,57,132,FZ-326-RFDenisFavrel132
4048,FZ-326-RF,Spring,14,13,87,141,FZ-326-RFMokeNKazi141


In [35]:
sessions_df['model'].value_counts()

Spring    2317
ZOE       1352
Name: model, dtype: int64

In [36]:
previous_df['car_model'] = previous_df['car_model'].replace({'Zoe':'ZOE'})
previous_df['car_model'].value_counts()

Spring    2594
ZOE       1433
Name: car_model, dtype: int64

In [38]:
print(previous_df[previous_df['car_model']=='ZOE']['battery_volume_change'].sum())
print(previous_df[previous_df['car_model']=='Spring']['battery_volume_change'].sum())
print(sessions_df[sessions_df['model']=='ZOE']['delta'].sum())
print(sessions_df[sessions_df['model']=='Spring']['delta'].sum())

115702
220361
110903.0
201815.0


In [None]:
merged_df = pd.merge(sessions_df, previous_df, on='created_id', how='outer')


In [None]:
merged_df

In [652]:
sessions_df.to_csv("charging_sessions.csv")