# Exploratory Data Analysis for Spectral Soil Data

This notebook performs exploratory data analysis on the spectral soil dataset.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
data = pd.read_excel('../data/spectra_with_target_T1.xls')
print(data.head())
print(data.shape)

In [None]:
# Basic statistics
print(data.describe())

# Check for missing values
print(data.isnull().sum())

In [None]:
# Visualize target distribution
plt.figure(figsize=(8, 6))
sns.histplot(data['target'], kde=True)
plt.title('Distribution of Target Soil Property')
plt.show()

In [None]:
# Plot sample spectral curves
spectral_cols = [col for col in data.columns if col != 'target']
plt.figure(figsize=(12, 6))
for i in range(min(5, len(data))):
    plt.plot(spectral_cols, data.iloc[i][spectral_cols], label=f'Sample {i+1}')
plt.xlabel('Wavelength')
plt.ylabel('Reflectance')
plt.title('Sample Spectral Curves')
plt.legend()
plt.show()

In [None]:
# Correlation analysis
corr_matrix = data.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, cmap='coolwarm', annot=False)
plt.title('Correlation Matrix')
plt.show()

# Correlation with target
target_corr = corr_matrix['target'].drop('target')
plt.figure(figsize=(12, 6))
target_corr.plot()
plt.title('Correlation of Spectral Bands with Target')
plt.show()