# 🧠 Startup EDA - Continued
Exploratory Data Analysis (continued steps)

In [None]:
# Load essential libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

# Load the dataset (update path if needed)
df = pd.read_csv('../data/startup_data.csv')

## 1. Identify column types

In [None]:
cat_cols = df.select_dtypes(include='object').columns.tolist()
num_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
print("Categorical columns:", cat_cols)
print("Numerical columns:", num_cols)

## 2. Visualize categorical features

In [None]:
for col in cat_cols:
    plt.figure(figsize=(8, 4))
    sns.countplot(data=df, x=col, order=df[col].value_counts().index[:10])
    plt.xticks(rotation=45)
    plt.title(f'Distribution of {col}')
    plt.show()

## 3. Correlation heatmap

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(df[num_cols].corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

## 4. Funding vs. Status

In [None]:
sns.boxplot(data=df, x='status', y='funding_total_usd')
plt.title('Funding Distribution by Startup Status')
plt.show()

## 5. Map status to binary target

In [None]:
df['status_binary'] = df['status'].map({'acquired': 1, 'closed': 0})
df['status_binary'].value_counts(normalize=True)

## 6. Summary
- Majority of startups are labeled 'closed'
- Funding values vary widely
- Some columns contain missing values
- Consider merging rare categories