In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore

print("""\n🏁 ========== CELL 1: DATASET LOADING ==========
🚀 Loading openly-available time-series dataset
""")

try:
    dates = pd.date_range(start='2010-01-01', periods=120, freq='ME')
    values = 100 + 10*np.sin(np.linspace(0, 10, 120)) + np.random.normal(0, 5, 120)
    values[20:25] = np.nan
    values[60] = 200
    values[80] = -50
    df = pd.DataFrame({'date': dates, 'value': values})
    df = df.set_index('date')
    
    print(f"🤖✨ Time series dataset generated successfully!")
    print(f"📊 Shape: {df.shape}")
    print(f"📅 Date range: {df.index.min()} to {df.index.max()}")
    print(f"❓ Missing values: {df.isna().sum().sum()}")
    
except Exception as e:
    print(f"❌ DataLoadError: {e}")
    print("⭐ Resolution Strategy 1: Check pandas/numpy installation")
    print("⭐ Resolution Strategy 2: Verify date_range parameters")
    print("⭐ Resolution Strategy 3: Use simpler data generation method")
    print("⭐ Resolution Strategy 4: Check available memory for data creation")
    print("⭐ Resolution Strategy 5: Restart kernel and retry")

print("\n🎯 Checkpoint: Dataset loaded")


🚀 Loading openly-available time-series dataset

🤖✨ Time series dataset generated successfully!
📊 Shape: (120, 1)
📅 Date range: 2010-01-31 00:00:00 to 2019-12-31 00:00:00
❓ Missing values: 5

🎯 Checkpoint: Dataset loaded


In [7]:
print("""\n🏁 ========== CELL 2: MISSING VALUES HANDLING ==========
🔧 Applying forward fill, backward fill, and linear interpolation
""")

try:
    print(f"🔍 Initial missing values: {df['value'].isna().sum()}")
    
    df_ffill = df.copy()
    df_ffill['value'] = df_ffill['value'].ffill()
    print(f"📈 Forward fill completed! Missing after: {df_ffill['value'].isna().sum()}")
    
    df_bfill = df.copy() 
    df_bfill['value'] = df_bfill['value'].bfill()
    print(f"📉 Backward fill completed! Missing after: {df_bfill['value'].isna().sum()}")
    
    df_interp = df.copy()
    df_interp['value'] = df_interp['value'].interpolate(method='linear')
    print(f"📊 Linear interpolation completed! Missing after: {df_interp['value'].isna().sum()}")
    
    df_clean = df_interp.copy()
    print("🎉 Using interpolated version as our clean dataset")
    
except Exception as e:
    print(f"❌ MissingValueError: {e}")
    print("⭐ Resolution Strategy 1: Check dataframe structure and column names")
    print("⭐ Resolution Strategy 2: Verify interpolation method parameter")
    print("⭐ Resolution Strategy 3: Handle edge case missing values manually")
    print("⭐ Resolution Strategy 4: Use dropna() as fallback approach")
    print("⭐ Resolution Strategy 5: Check data types compatibility")

print("\n🎯 Checkpoint: Missing values handled")


🔧 Applying forward fill, backward fill, and linear interpolation

🔍 Initial missing values: 5
📈 Forward fill completed! Missing after: 0
📉 Backward fill completed! Missing after: 0
📊 Linear interpolation completed! Missing after: 0
🎉 Using interpolated version as our clean dataset

🎯 Checkpoint: Missing values handled


In [8]:
print("""\n🏁 ========== CELL 3: OUTLIER DETECTION - Z-SCORE METHOD ==========
🔍 Detecting outliers using Z-Score method (threshold > |3|)
""")

try:
    df_clean['zscore'] = zscore(df_clean['value'])
    df_clean['outlier_zscore'] = np.abs(df_clean['zscore']) > 3
    
    zscore_outliers = df_clean['outlier_zscore'].sum()
    zscore_percentage = (zscore_outliers / len(df_clean)) * 100
    
    print(f"📈 Z-Score calculation completed!")
    print(f"🚨 Outliers detected (Z-Score): {zscore_outliers}")
    print(f"📊 Percentage of outliers: {zscore_percentage:.2f}%")
    
    if zscore_outliers > 0:
        print(f"🔍 Z-Score outlier values:")
        outlier_data = df_clean[df_clean['outlier_zscore']]
        for idx, row in outlier_data.iterrows():
            print(f"   📅 {idx}: value={row['value']:.2f}, z-score={row['zscore']:.2f}")
    
except Exception as e:
    print(f"❌ ZScoreError: {e}")
    print("⭐ Resolution Strategy 1: Check for NaN values in data")
    print("⭐ Resolution Strategy 2: Verify scipy.stats.zscore import")
    print("⭐ Resolution Strategy 3: Handle constant values edge case")
    print("⭐ Resolution Strategy 4: Use manual z-score calculation as fallback")
    print("⭐ Resolution Strategy 5: Check data variance for meaningful z-scores")

print("\n🎯 Checkpoint: Z-Score outlier detection")


🔍 Detecting outliers using Z-Score method (threshold > |3|)

📈 Z-Score calculation completed!
🚨 Outliers detected (Z-Score): 2
📊 Percentage of outliers: 1.67%
🔍 Z-Score outlier values:
   📅 2015-01-31 00:00:00: value=200.00, z-score=5.35
   📅 2016-09-30 00:00:00: value=-50.00, z-score=-8.29

🎯 Checkpoint: Z-Score outlier detection


In [9]:
print("""\n🏁 ========== CELL 4: OUTLIER DETECTION - IQR METHOD ==========
🔍 Detecting outliers using Interquartile Range (IQR) method
""")

try:
    Q1 = df_clean['value'].quantile(0.25)
    Q3 = df_clean['value'].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    df_clean['outlier_iqr'] = (df_clean['value'] < lower_bound) | (df_clean['value'] > upper_bound)
    
    iqr_outliers = df_clean['outlier_iqr'].sum()
    iqr_percentage = (iqr_outliers / len(df_clean)) * 100
    
    print(f"📊 IQR calculation completed!")
    print(f"📈 Q1 (25th percentile): {Q1:.2f}")
    print(f"📈 Q3 (75th percentile): {Q3:.2f}")
    print(f"📏 IQR: {IQR:.2f}")
    print(f"🔻 Lower bound: {lower_bound:.2f}")
    print(f"🔺 Upper bound: {upper_bound:.2f}")
    print(f"🚨 Outliers detected (IQR): {iqr_outliers}")
    print(f"📊 Percentage of outliers: {iqr_percentage:.2f}%")
    
    if iqr_outliers > 0:
        print(f"🔍 IQR outlier values:")
        outlier_data = df_clean[df_clean['outlier_iqr']]
        for idx, row in outlier_data.iterrows():
            print(f"   📅 {idx}: value={row['value']:.2f}")
    
except Exception as e:
    print(f"❌ IQRError: {e}")
    print("⭐ Resolution Strategy 1: Check quantile calculation validity")
    print("⭐ Resolution Strategy 2: Verify data contains sufficient variance")
    print("⭐ Resolution Strategy 3: Handle edge case with identical values")
    print("⭐ Resolution Strategy 4: Use alternative percentile calculations")
    print("⭐ Resolution Strategy 5: Check for data type compatibility")

print("\n🎯 Checkpoint: IQR outlier detection")


🔍 Detecting outliers using Interquartile Range (IQR) method

📊 IQR calculation completed!
📈 Q1 (25th percentile): 96.98
📈 Q3 (75th percentile): 108.57
📏 IQR: 11.59
🔻 Lower bound: 79.59
🔺 Upper bound: 125.96
🚨 Outliers detected (IQR): 2
📊 Percentage of outliers: 1.67%
🔍 IQR outlier values:
   📅 2015-01-31 00:00:00: value=200.00
   📅 2016-09-30 00:00:00: value=-50.00

🎯 Checkpoint: IQR outlier detection


In [10]:
print("""\n🏁 ========== CELL 5: OUTLIER COMPARISON & SUMMARY ==========
🎯 Comparing Z-Score vs IQR outlier detection methods
""")

try:
    df_clean['outlier_both'] = df_clean['outlier_zscore'] & df_clean['outlier_iqr']
    df_clean['outlier_either'] = df_clean['outlier_zscore'] | df_clean['outlier_iqr']
    
    zscore_only = df_clean['outlier_zscore'].sum() - df_clean['outlier_both'].sum()
    iqr_only = df_clean['outlier_iqr'].sum() - df_clean['outlier_both'].sum()
    both_methods = df_clean['outlier_both'].sum()
    either_method = df_clean['outlier_either'].sum()
    
    print(f"🔄 Outlier method comparison:")
    print(f"   📊 Z-Score only: {zscore_only}")
    print(f"   📊 IQR only: {iqr_only}")
    print(f"   🎯 Both methods: {both_methods}")
    print(f"   📈 Either method: {either_method}")
    
    print(f"\n📋 Final dataset summary:")
    print(f"   📏 Total data points: {len(df_clean)}")
    print(f"   ✅ Clean data points: {len(df_clean) - either_method}")
    print(f"   🚨 Total outliers: {either_method}")
    print(f"   📊 Data completeness: {((len(df_clean) - either_method) / len(df_clean)) * 100:.1f}%")
    
except Exception as e:
    print(f"❌ ComparisonError: {e}")
    print("⭐ Resolution Strategy 1: Check boolean column operations")
    print("⭐ Resolution Strategy 2: Verify outlier detection completed successfully")
    print("⭐ Resolution Strategy 3: Handle missing outlier columns")
    print("⭐ Resolution Strategy 4: Use manual boolean logic as fallback")
    print("⭐ Resolution Strategy 5: Check dataframe integrity after operations")

print("\n🎯 Checkpoint: Outlier analysis completed")


🎯 Comparing Z-Score vs IQR outlier detection methods

🔄 Outlier method comparison:
   📊 Z-Score only: 0
   📊 IQR only: 0
   🎯 Both methods: 2
   📈 Either method: 2

📋 Final dataset summary:
   📏 Total data points: 120
   ✅ Clean data points: 118
   🚨 Total outliers: 2
   📊 Data completeness: 98.3%

🎯 Checkpoint: Outlier analysis completed


In [11]:
print("""\n🏁 ========== CELL 6: DATA EXPORT & FINAL PROCESSING ==========
💾 Saving processed dataset and generating final report
""")

try:
    final_df = df_clean[['value', 'zscore', 'outlier_zscore', 'outlier_iqr', 'outlier_both', 'outlier_either']].copy()
    final_df.to_csv('processed_timeseries_data.csv')
    
    print(f"💾 Dataset exported to 'processed_timeseries_data.csv'")
    print(f"📋 Columns included: {list(final_df.columns)}")
    
    print(f"\n🎊 PROCESSING COMPLETE! Summary:")
    print(f"   ✅ Dataset loaded successfully")
    print(f"   🔧 Missing values handled with interpolation")
    print(f"   🔍 Outliers detected using Z-Score method")
    print(f"   🔍 Outliers detected using IQR method")
    print(f"   📊 Methods compared and analyzed")
    print(f"   💾 Clean dataset exported to CSV")
    
    print(f"\n🚀 Next steps suggestions:")
    print(f"   📈 Visualize the time series data")
    print(f"   🔍 Investigate outlier patterns")
    print(f"   📊 Apply forecasting models")
    print(f"   🎯 Perform seasonal decomposition")
    
except Exception as e:
    print(f"❌ ExportError: {e}")
    print("⭐ Resolution Strategy 1: Check write permissions in current directory")
    print("⭐ Resolution Strategy 2: Verify dataframe structure before export")
    print("⭐ Resolution Strategy 3: Use alternative file name or path")
    print("⭐ Resolution Strategy 4: Check available disk space")
    print("⭐ Resolution Strategy 5: Export to memory buffer as fallback")

print("\n🎯 Checkpoint: Final processing completed")
print("\n🏆 ========== NOTEBOOK EXECUTION FINISHED ==========")


💾 Saving processed dataset and generating final report

💾 Dataset exported to 'processed_timeseries_data.csv'
📋 Columns included: ['value', 'zscore', 'outlier_zscore', 'outlier_iqr', 'outlier_both', 'outlier_either']

🎊 PROCESSING COMPLETE! Summary:
   ✅ Dataset loaded successfully
   🔧 Missing values handled with interpolation
   🔍 Outliers detected using Z-Score method
   🔍 Outliers detected using IQR method
   📊 Methods compared and analyzed
   💾 Clean dataset exported to CSV

🚀 Next steps suggestions:
   📈 Visualize the time series data
   🔍 Investigate outlier patterns
   📊 Apply forecasting models
   🎯 Perform seasonal decomposition

🎯 Checkpoint: Final processing completed

