In [2]:
import pandas as pd

# Read the data from Aggregated.csv without header
df = pd.read_csv('Aggregated_original.csv', delimiter=';', decimal=',', index_col=0, header=None)

# Read the column labels from column_labels.csv
column_labels = pd.read_csv('column_labels.csv', delimiter=';', decimal=',', header=None)

# Set the column names in the data dataframe, excluding the timestamp column name
df.columns = column_labels.iloc[0].tolist()

df.index.name = None

In [3]:
# Get the row with index 203 (204th row)
row_204 = df.iloc[204]

# Print the row and its missing values
print("Row 204 data:")
print(row_204)
print("\nMissing values in row 204:")
print(row_204[row_204.isna()])

Row 204 data:
120022     NaN
120023     NaN
120051     NaN
120081     NaN
120111     NaN
          ... 
711753     0.0
711754     0.0
711801     5.0
711811    12.0
711812     2.0
Name: 03/11/2019 03:00, Length: 123, dtype: float64

Missing values in row 204:
120022   NaN
120023   NaN
120051   NaN
120081   NaN
120111   NaN
120112   NaN
120113   NaN
134021   NaN
134031   NaN
134041   NaN
134051   NaN
134081   NaN
134111   NaN
134112   NaN
405021   NaN
405022   NaN
405031   NaN
405032   NaN
405041   NaN
405051   NaN
405071   NaN
405081   NaN
405082   NaN
405091   NaN
405101   NaN
405111   NaN
405641   NaN
405651   NaN
405661   NaN
405681   NaN
405711   NaN
405712   NaN
405721   NaN
Name: 03/11/2019 03:00, dtype: float64


In [6]:
# Fill missing values with 0
# df = df.fillna(0)

# Linear interpolation
df = df.interpolate(method='linear')

In [7]:
# Get the row with index 203 (204th row)
row_204 = df.iloc[204]

# Print the row and its missing values
print("Row 204 data:")
print(row_204)
print("\nMissing values in row 204:")
print(row_204[row_204.isna()])

Row 204 data:
120022     0.0
120023     0.5
120051     7.0
120081     1.5
120111     8.5
          ... 
711753     0.0
711754     0.0
711801     5.0
711811    12.0
711812     2.0
Name: 03/11/2019 03:00, Length: 123, dtype: float64

Missing values in row 204:
Series([], Name: 03/11/2019 03:00, dtype: float64)


In [15]:
# Check the data types of the index column
index_dtype = df.index.dtype
print("The dtype of the index column is:", index_dtype)


The dtype of the index column is: object


In [16]:
# Convert the index column to datetime64[D] dtype
df.index = pd.to_datetime(df.index, format="%d/%m/%Y %H:%M")
# Check the new dtype of the index column
index_dtype = df.index.dtype
print("The dtype of the index column is now:", index_dtype)


The dtype of the index column is now: datetime64[ns]


In [17]:
# Get the row with index 203 (204th row)
row_204 = df.index[204]

# Print the row and its missing values
print("Row 204 data:")
print(row_204)

Row 204 data:
2019-11-03 03:00:00


In [27]:
# Save the dataframe into an HDF5 file in the folder two levels up:
save_path = '../../'
df.to_hdf(save_path + 'DH.h5', key='df', mode='w')