# Task 1.4: Assumptions, Limitations, and Communication Strategy

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

In [11]:
# Load data for reference
df = pd.read_csv('../../data/raw/BrentOilPrices.csv')

def parse_date(date_str):
    try:
        if '-' in date_str:
            return pd.to_datetime(date_str, format='%d-%b-%y')
        else:
            return pd.to_datetime(date_str, format='%b %d, %Y')
    except:
        return pd.NaT

df['Date'] = df['Date'].apply(parse_date)
df = df.dropna(subset=['Date']).sort_values('Date').reset_index(drop=True)

print(f"Data period: {df['Date'].min().strftime('%Y-%m-%d')} to {df['Date'].max().strftime('%Y-%m-%d')}")
print(f"Total observations: {len(df)}")
print(f"Price range: ${df['Price'].min():.2f} to ${df['Price'].max():.2f}")

Data period: 1987-05-20 to 2022-11-14
Total observations: 9011
Price range: $9.10 to $143.95


In [12]:
# Data quality assessment
missing_values = df.isnull().sum()
print(f"Missing values - Date: {missing_values['Date']}, Price: {missing_values['Price']}")

Q1 = df['Price'].quantile(0.25)
Q3 = df['Price'].quantile(0.75)
IQR = Q3 - Q1
outliers = df[(df['Price'] < Q1 - 1.5*IQR) | (df['Price'] > Q3 + 1.5*IQR)]
print(f"Outliers: {len(outliers)} ({len(outliers)/len(df)*100:.2f}%)")

date_range = (df['Date'].max() - df['Date'].min()).days
expected_observations = date_range + 1
actual_observations = len(df)
completeness = actual_observations / expected_observations * 100
print(f"Data completeness: {completeness:.2f}%")

Missing values - Date: 0, Price: 0
Outliers: 0 (0.00%)
Data completeness: 69.51%


In [13]:
# Correlation vs causation analysis
events_data = {
    'Asian Crisis (1998)': ('1998-06-01', '1998-12-31'),
    'Financial Crisis (2008)': ('2008-06-01', '2009-03-31'),
    'COVID-19 (2020)': ('2020-03-01', '2020-05-31')
}

print("Correlation Analysis:")
for period_name, (start_date, end_date) in events_data.items():
    period_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    if len(period_data) > 0:
        print(f"{period_name}: Price range ${period_data['Price'].min():.2f} - ${period_data['Price'].max():.2f}")

Correlation Analysis:
Asian Crisis (1998): Price range $9.10 - $14.84
Financial Crisis (2008): Price range $33.73 - $143.95
COVID-19 (2020): Price range $9.12 - $52.52


In [14]:
# Stakeholder communication matrix
stakeholders = {
    'Investors': {
        'needs': ['Risk assessment', 'Portfolio optimization', 'Timing decisions'],
        'channels': ['Interactive dashboard', 'Executive reports', 'Webinars'],
        'frequency': 'Real-time/Weekly'
    },
    'Policymakers': {
        'needs': ['Economic stability', 'Energy security', 'Policy effectiveness'],
        'channels': ['Policy briefs', 'Government reports', 'Expert consultations'],
        'frequency': 'Monthly/Quarterly'
    },
    'Energy Companies': {
        'needs': ['Supply chain planning', 'Cost management', 'Operational decisions'],
        'channels': ['Corporate dashboards', 'Strategic reports', 'Consulting services'],
        'frequency': 'Weekly/Monthly'
    },
    'Analysts': {
        'needs': ['Deep analysis', 'Methodology details', 'Statistical validation'],
        'channels': ['Technical reports', 'Academic papers', 'Conference presentations'],
        'frequency': 'As needed'
    }
}

print("Stakeholder Communication Matrix:")
for stakeholder, details in stakeholders.items():
    print(f"\n{stakeholder}:")
    print(f"  Needs: {', '.join(details['needs'])}")
    print(f"  Channels: {', '.join(details['channels'])}")
    print(f"  Frequency: {details['frequency']}")

Stakeholder Communication Matrix:

Investors:
  Needs: Risk assessment, Portfolio optimization, Timing decisions
  Channels: Interactive dashboard, Executive reports, Webinars
  Frequency: Real-time/Weekly

Policymakers:
  Needs: Economic stability, Energy security, Policy effectiveness
  Channels: Policy briefs, Government reports, Expert consultations
  Frequency: Monthly/Quarterly

Energy Companies:
  Needs: Supply chain planning, Cost management, Operational decisions
  Channels: Corporate dashboards, Strategic reports, Consulting services
  Frequency: Weekly/Monthly

Analysts:
  Needs: Deep analysis, Methodology details, Statistical validation
  Channels: Technical reports, Academic papers, Conference presentations
  Frequency: As needed


In [15]:
# Risk mitigation strategies
print("Risk Mitigation Strategies:")
print("1. Data Quality: Multiple data source validation, robust cleaning procedures")
print("2. Modeling: Model validation, sensitivity analysis, cross-validation")
print("3. Interpretation: Clear correlation vs. causation disclaimers")
print("4. Communication: Tailored messaging, regular updates, expert review")

print("\nKey Limitations:")
print("1. Cannot prove causation, only correlation")
print("2. Limited to single price series")
print("3. Model complexity and computational costs")
print("4. Future applicability uncertain")
print("5. Many unobserved variables")

Risk Mitigation Strategies:
1. Data Quality: Multiple data source validation, robust cleaning procedures
2. Modeling: Model validation, sensitivity analysis, cross-validation
3. Interpretation: Clear correlation vs. causation disclaimers
4. Communication: Tailored messaging, regular updates, expert review

Key Limitations:
1. Cannot prove causation, only correlation
2. Limited to single price series
3. Model complexity and computational costs
4. Future applicability uncertain
5. Many unobserved variables
