In [53]:
import pandas as pd
import netCDF4 as nc
import matsim

## INPUT

In [54]:
# Set raw netcdf file string
netcdf = 'C:/Users/david/Documents/00_VSP/01_Mosaik-2/netcdfConverterTest/equil_run/10.position-emissions.nc'

In [55]:
# Set file events file string
events = "C:/Users/david/Documents/00_VSP/01_Mosaik-2/netcdfConverterTest/equil_run/10.events.xml.gz"

## ------------------------------

## NetCdf Import

**Netcdf file location**

In [56]:
netcdf

'C:/Users/david/Documents/00_VSP/01_Mosaik-2/netcdfConverterTest/equil_run/10.position-emissions.nc'

**Some general analysis**

In [57]:
ds = nc.Dataset(netcdf)
print(ds)
for dim in ds.dimensions.values():
    print(dim)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_CLASSIC data model, file format NETCDF3):
    dimensions(sizes): time(12954), agents(100)
    variables(dimensions): float64 time(time), int32 number_of_vehicles(time), int32 vehicle_id(time, agents), float64 x(time, agents), float64 y(time, agents), float64 CO(time, agents), float64 CO2(time, agents), float64 NOx(time, agents), float64 NO2(time, agents), float64 PM10(time, agents), float64 NO(time, agents)
    groups: 
<class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 12954
<class 'netCDF4._netCDF4.Dimension'>: name = 'agents', size = 100


**Use conversion via pandas to process netcdf**

In [58]:
# Create dataframe for each variable and save each variable dataframe to data list
data = dict()
for var in ds.variables:
    df = pd.DataFrame(ds[var][:])
    data[var] = df

In [59]:
# Process time column (which is the only one dimensional)
# => x dimension
df_time = data['time'].copy()
df_time.rename(columns={0:'time'}, inplace=True)

data.pop('time')
data.pop('number_of_vehicles')

# Find vehicle columns
# => y dimension
columns = data['vehicle_id'].columns

In [61]:
df_time.shape[0]

12954

In [60]:
columns

RangeIndex(start=0, stop=100, step=1)

In [25]:
# Process all other columns (two-dimensional)
dfs = list()

for c in columns:
    df = df_time.copy()
    for key in data.keys():
        df[key] = data[key][c]
        dfs.append(df)

In [26]:
df_from_netcdf = pd.concat(dfs, ignore_index = True)

**Final head and tail view**

In [27]:
df_from_netcdf.head(3)

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
0,21511.0,1,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409
1,21512.0,1,-14945.24,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
2,21513.0,1,-14917.46,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807


In [28]:
df_from_netcdf.tail(3)

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
11658597,48276.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11658598,48277.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11658599,48278.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
df_from_netcdf.shape[0]

2723112

## Events Import

**Events file location**

In [None]:
events

**Use matsim module to read-in position emission events**

In [29]:
events = matsim.event_reader(events, types='positionEmission')

In [30]:
lst = list()
for event in events:
    lst.append(event)
df_from_events = pd.DataFrame(lst)
df_from_events.head(3)

Unnamed: 0,time,type,person,x,y,state,linkId,vehicleId,CO,CO2_TOTAL,...,BC_exhaust,Benzene,PN,Pb,CH4,PM2_5_non_exhaust,PM_non_exhaust,BC_non_exhaust,N2O,NH3
0,21510.0,positionEmission,3,-15000.0,-3.75,PERSON_DRIVING_CAR,1,3_car,0.990418673,5.043848991,...,0.000289713,0.018397009,233400000000.0,0.0,0.01499084,,,,,
1,21511.0,positionEmission,3,-14973.02,-3.75,PERSON_DRIVING_CAR,6,3_car,0.0963123972440384,6.046308323643902,...,7.245646275999882e-05,2.243923901999964e-05,104511616599.9983,0.0,0.0002938585246599953,0.0002428199460399961,0.0008093999730199869,2.428199999999961e-05,7.342477495999881e-05,0.00062368389845999
2,21512.0,positionEmission,3,-14945.240000000002,-3.75,PERSON_DRIVING_CAR,6,3_car,0.0991682133224358,6.225591001883739,...,7.460491235999687e-05,2.310459821999903e-05,107610552599.9955,0.0,0.0003025718982599873,0.0002500199444399895,0.0008333999722199651,2.5001999999998952e-05,7.560193655999684e-05,0.0006421771200599731


**Do some data cleaning and transformation**

In [31]:
# Some header renaming
to_replace = {
    'CO2_TOTAL':'CO2',
    'PM2_5':'PM10',
    'person':'vehicle_id'
}
df_from_events.rename(columns = to_replace, inplace=True)

In [32]:
# Narrow down to relevant columns
df_from_events = df_from_events[['time', 'vehicle_id', 'x', 'y', 'CO', 'CO2', 'NOx', 'NO2', 'PM10']]

In [33]:
# Convert to numeric
df_from_events = df_from_events.apply(pd.to_numeric)

In [34]:
# NO calculation as in position emission module
df_from_events['NO'] = df_from_events['NOx'] - df_from_events['NO2']

**Final head and tail view**

In [35]:
df_from_events.head(3)

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
0,21510.0,3,-15000.0,-3.75,0.990419,5.043849,0.108088,0.006123,0.00059,0.101964
1,21511.0,3,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409
2,21512.0,3,-14945.24,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807


In [36]:
df_from_events.tail(3)

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
302965,48276.0,5,-15041.67,-3.75,0.049584,3.112796,0.010238,0.003335,8.7e-05,0.006903
302966,48277.0,5,-15027.78,-3.75,0.049584,3.112796,0.010238,0.003335,8.7e-05,0.006903
302967,48278.0,5,-15013.89,-3.75,0.049584,3.112796,0.010238,0.003335,8.7e-05,0.006903


In [46]:
df_from_events.shape[0]

302968

## Make comparable

**Throw-out empty rows in df_from_netcdf**

In [37]:
df_from_netcdf.shape[0]

11658600

In [38]:
df_from_netcdf = df_from_netcdf[df_from_netcdf['CO'] !=0]
df_from_netcdf.shape[0]

2723112

In [39]:
df_from_netcdf.head(3)

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
0,21511.0,1,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409
1,21512.0,1,-14945.24,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
2,21513.0,1,-14917.46,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807


## Compare

In [40]:
df_compare = pd.merge(df_from_netcdf, df_from_events, how='outer', on=['time', 'vehicle_id', 'x', 'y'])
df_compare.head(3)

Unnamed: 0,time,vehicle_id,x,y,CO_x,CO2_x,NOx_x,NO2_x,PM10_x,NO_x,CO_y,CO2_y,NOx_y,NO2_y,PM10_y,NO_y
0,21511.0,1,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409,,,,,,
1,21511.0,1,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409,,,,,,
2,21511.0,1,-14973.02,-3.75,0.096312,6.046308,0.019887,0.006477,0.00017,0.013409,,,,,,


In [45]:
df_compare.loc[2000:2010]

Unnamed: 0,time,vehicle_id,x,y,CO_x,CO2_x,NOx_x,NO2_x,PM10_x,NO_x,CO_y,CO2_y,NOx_y,NO2_y,PM10_y,NO_y
2000,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2001,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2002,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2003,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2004,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2005,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2006,21733.0,1,-8805.86,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2007,21734.0,1,-8778.08,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2008,21734.0,1,-8778.08,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,
2009,21734.0,1,-8778.08,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807,,,,,,


In [68]:
df_from_netcdf[(df_from_netcdf['vehicle_id'] == 1) & (df_from_netcdf['time'] == 21600.0)].sort_values(by='time')

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
89,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
13043,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
25997,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
38951,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
51905,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
64859,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
77813,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
90767,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
103721,21600.0,1,-12500.6,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807


In [65]:
df_from_events[df_from_events['vehicle_id'] == 1].sort_values(by='time')

Unnamed: 0,time,vehicle_id,x,y,CO,CO2,NOx,NO2,PM10,NO
278,21600.0,1,-15500.000000,-360.00,3.687783,14.634802,0.187261,0.010155,0.001124,0.177106
513,21610.0,1,-14994.601309,-3.75,0.048940,3.072368,0.010105,0.003291,0.000086,0.006814
551,21611.0,1,-14966.821309,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
589,21612.0,1,-14939.041309,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
627,21613.0,1,-14911.261309,-3.75,0.099168,6.225591,0.020476,0.006669,0.000175,0.013807
...,...,...,...,...,...,...,...,...,...,...
300622,38157.0,1,-15069.450000,-3.75,0.049584,3.112796,0.010238,0.003335,0.000087,0.006903
300623,38158.0,1,-15055.560000,-3.75,0.049584,3.112796,0.010238,0.003335,0.000087,0.006903
300624,38159.0,1,-15041.670000,-3.75,0.049584,3.112796,0.010238,0.003335,0.000087,0.006903
300625,38160.0,1,-15027.780000,-3.75,0.049584,3.112796,0.010238,0.003335,0.000087,0.006903
