# Exploration Notebook

This notebook is used for exploratory data analysis (EDA) of the disaster resource and volunteer allocation project. It will include data loading, preprocessing, and visualization steps.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

In [2]:
# Load data
data_path = '../data/processed/disaster_data.csv'
df = pd.read_csv(data_path)
df.head()

In [3]:
# Data overview
df.info()
df.describe()

In [4]:
# Check for missing values
missing_values = df.isnull().sum()
missing_values[missing_values > 0]

In [5]:
# Visualize the distribution of severity
plt.figure(figsize=(10, 6))
sns.countplot(data=df, x='severity')
plt.title('Distribution of Severity Levels')
plt.xlabel('Severity')
plt.ylabel('Count')
plt.show()

In [6]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
correlation = df.corr()
sns.heatmap(correlation, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [7]:
# Save the cleaned data for further analysis
cleaned_data_path = '../data/processed/cleaned_disaster_data.csv'
df.to_csv(cleaned_data_path, index=False)
print(f'Cleaned data saved to {cleaned_data_path}')