In [None]:
# Data_Preprocessing.ipynb

# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the time series dataset
df = pd.read_csv('data/time_series_data.csv')

# Display the first few rows of the dataset
print("Original Dataset:")
print(df.head())

# Convert the 'timestamp' column to datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Set the 'timestamp' column as the index
df.set_index('timestamp', inplace=True)

# Resample the data to a daily frequency (adjust as needed)
df_resampled = df.resample('D').mean()

# Interpolate missing values if any
df_resampled['value'] = df_resampled['value'].interpolate()

# Display the preprocessed dataset
print("\nPreprocessed Dataset:")
print(df_resampled.head())

# Normalize the 'value' column using Min-Max scaling
scaler = MinMaxScaler()
df_resampled['value_scaled'] = scaler.fit_transform(df_resampled[['value']])

# Display the final preprocessed and scaled dataset
print("\nFinal Preprocessed and Scaled Dataset:")
print(df_resampled.head())

# Save the preprocessed and scaled dataset to a new CSV file
df_resampled.to_csv('data/preprocessed_time_series_data.csv')
