In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO

def parse_serial_data(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Skip the header
    data = lines[1:]
    
    parsed_data = []
    current_entry = {"Timestamp": "", "Value": "", "Type": ""}
    
    for line in data:
        parts = line.strip().split(';')
        if len(parts) == 3:
            # New entry
            if current_entry["Timestamp"]:
                parsed_data.append(current_entry)
            current_entry = {"Timestamp": parts[0], "Value": parts[1], "Type": parts[2]}
        elif len(parts) == 1 and current_entry["Timestamp"]:
            # Continuation of previous entry
            current_entry["Value"] += parts[0]
    
    # Add the last entry
    if current_entry["Timestamp"]:
        parsed_data.append(current_entry)
    
    return pd.DataFrame(parsed_data)

# Load and parse the data
df = parse_serial_data('serial_monitor_export.csv')

# Convert Timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='%I:%M:%S %p.%f')

# Extract distance measurements
df['Distance'] = df['Value'].str.extract('Distance: (\d+)').astype(float)

# Create binary columns for LED states
for led in range(1, 6):
    df[f'LED{led}'] = (df['Value'].str.contains(f'LED{led}.*: ON')).astype(int)

# Remove rows without distance measurements
df_distance = df.dropna(subset=['Distance'])

# Plot distance over time
plt.figure(figsize=(12, 6))
plt.plot(df_distance['Timestamp'], df_distance['Distance'])
plt.title('Distance Measurements Over Time')
plt.xlabel('Time')
plt.ylabel('Distance (cm)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('distance_over_time.png')
plt.close()

# Calculate summary statistics
print("Summary Statistics for Distance:")
print(df_distance['Distance'].describe())

# Analyze LED states
led_columns = [f'LED{i}' for i in range(1, 6)]
led_data = df[df['Value'].str.contains('LED', na=False)][['Timestamp'] + led_columns]
led_data = led_data.set_index('Timestamp').resample('1S').first().fillna(method='ffill')

# Plot LED states over time
plt.figure(figsize=(12, 6))
for led in led_columns:
    plt.plot(led_data.index, led_data[led], label=led)
plt.title('LED States Over Time')
plt.xlabel('Time')
plt.ylabel('State (0=OFF, 1=ON)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('led_states_over_time.png')
plt.close()

# Correlation between distance and LED states
corr_data = pd.merge(df_distance[['Timestamp', 'Distance']], led_data, on='Timestamp', how='left')
correlation_matrix = corr_data[['Distance'] + led_columns].corr()
print("\nCorrelation between Distance and LED states:")
print(correlation_matrix['Distance'])


# Additional analysis: Distance distribution
plt.figure(figsize=(10, 6))
sns.histplot(df_distance['Distance'], kde=True)
plt.title('Distribution of Distance Measurements')
plt.xlabel('Distance (cm)')
plt.ylabel('Frequency')
plt.savefig('distance_distribution.png')
plt.close()

print("\nAnalysis complete. Check the generated PNG files for visualizations.")

# Time series analysis
from statsmodels.tsa.seasonal import seasonal_decompose

# Ensure the timestamp is the index and sort
df_distance = df_distance.set_index('Timestamp').sort_index()

# Perform time series decomposition
decomposition = seasonal_decompose(df_distance['Distance'], model='additive', period=10)

# Plot the decomposition
plt.figure(figsize=(12, 10))
plt.subplot(411)
plt.plot(decomposition.observed)
plt.title('Observed')
plt.subplot(412)
plt.plot(decomposition.trend)
plt.title('Trend')
plt.subplot(413)
plt.plot(decomposition.seasonal)
plt.title('Seasonal')
plt.subplot(414)
plt.plot(decomposition.resid)
plt.title('Residual')
plt.tight_layout()
plt.savefig('time_series_decomposition.png')
plt.close()

print("Time series decomposition complete. Check 'time_series_decomposition.png' for visualization.")

Summary Statistics for Distance:
count    288.000000
mean      16.916667
std       19.499486
min        0.000000
25%        0.000000
50%       13.000000
75%       31.000000
max      123.000000
Name: Distance, dtype: float64

Correlation between Distance and LED states:
Distance    1.0
LED1        NaN
LED2        NaN
LED3        NaN
LED4        NaN
LED5        NaN
Name: Distance, dtype: float64


  led_data = led_data.set_index('Timestamp').resample('1S').first().fillna(method='ffill')
  led_data = led_data.set_index('Timestamp').resample('1S').first().fillna(method='ffill')



Analysis complete. Check the generated PNG files for visualizations.
Time series decomposition complete. Check 'time_series_decomposition.png' for visualization.
