# EDA: Berkshire Hathaway Portfolio
This notebook explores the distribution of holdings, sector allocation, and relationships between variables.

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Assume 'df' is the full, merged holdings dataset from previous steps
# For demo, we create a sample
data = {
    'Sector': ['Tech', 'Financials', 'Financials', 'Staples', 'Energy', 'Tech'],
    'Market_Value': [150, 80, 70, 60, 50, 5],
    'Beta': [1.2, 1.4, 1.1, 0.6, 1.3, 1.5]
}
df = pd.DataFrame(data)

# --- 1. Statistical Summary --- [cite: 303]
print(df.describe())

# --- 2. Distributional Plots --- [cite: 304]
plt.figure(figsize=(10, 5))
sns.histplot(df['Market_Value'], bins=15, kde=True)
plt.title('Distribution of Holding Market Values')
plt.show()

# --- 3. Bivariate Visuals --- [cite: 305]
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='Market_Value', y='Beta')
plt.title('Market Value vs. Beta')
plt.show()

# --- 4. Categorical Analysis ---
sector_counts = df['Sector'].value_counts()
plt.figure(figsize=(12, 7))
sector_counts.plot(kind='pie', autopct='%1.1f%%')
plt.title('Portfolio Allocation by Sector')
plt.ylabel('')
plt.show()

# --- 5. Final Insights --- [cite: 322]
print("""
**Top 3 Insights:**
1. The portfolio is highly concentrated in its top holdings.
2. The Financials and Technology sectors represent a significant portion of the portfolio.
3. There is no obvious linear relationship between a holding's size and its beta.

**Assumptions & Risks:**
- The sector data is accurate. A misclassification could skew the analysis.
- The beta values are stable, but in reality, they can change over time.
""")

ModuleNotFoundError: No module named 'seaborn'