-
Notifications
You must be signed in to change notification settings - Fork 0
Statistical Analysis
Temp edited this page Sep 23, 2025
·
1 revision
Last Updated: September 23, 2025 1:48 PM EST
The SQLite MCP Server v2.6.0 includes a comprehensive statistical analysis library with 8 specialized functions for data analysis and business intelligence.
π See Performance Optimization Guide for complete statistical analysis workflows and advanced use cases.
| Tool | Description |
|---|---|
descriptive_statistics |
Calculate comprehensive descriptive statistics for numeric columns |
correlation_analysis |
Calculate correlation coefficients between numeric columns |
percentile_analysis |
Calculate percentiles and quartiles for numeric columns |
distribution_analysis |
Analyze distribution (skewness, kurtosis, normality) |
moving_averages |
Calculate moving averages and trend analysis for time series |
outlier_detection |
Detect outliers using IQR method and Z-score analysis |
regression_analysis |
Perform linear regression analysis between variables |
hypothesis_testing |
Perform statistical hypothesis tests (t-test, chi-square) |
descriptive_statistics({
table_name: "sales_data",
column_name: "revenue",
where_clause: "year = 2024" // optional
})Returns comprehensive statistics including:
- Mean, median, mode
- Standard deviation, variance
- Range (min, max)
- Coefficient of variation
- Count of non-null values
// Analyze multiple columns at once
descriptive_statistics({
table_name: "employee_data",
columns: ["salary", "years_experience", "performance_score"],
group_by: "department"
})// Compare statistics across different time periods
descriptive_statistics({
table_name: "monthly_sales",
column_name: "revenue",
where_clause: "month IN ('Dec', 'Jan', 'Feb')",
label: "Winter Sales"
})percentile_analysis({
table_name: "sales_data",
column_name: "revenue",
percentiles: [25, 50, 75, 90, 95, 99] // optional
})Calculates:
- Quartiles (Q1, Q2, Q3)
- Custom percentiles
- Interquartile range (IQR)
- Distribution boundaries
// Segment customers by purchase amounts
percentile_analysis({
table_name: "customer_purchases",
column_name: "total_spent",
percentiles: [20, 40, 60, 80], // Quintiles
return_segments: true
})// Identify top performers
percentile_analysis({
table_name: "sales_reps",
column_name: "monthly_sales",
percentiles: [90, 95, 99],
where_clause: "active = 1"
})moving_averages({
table_name: "daily_sales",
value_column: "revenue",
time_column: "date",
window_sizes: [7, 30, 90] // optional
})Generates:
- Simple moving averages
- Trend analysis
- Seasonal patterns
- Smoothed data series
// Analyze stock price trends
moving_averages({
table_name: "stock_prices",
value_column: "close_price",
time_column: "date",
window_sizes: [5, 20, 50, 200], // Common trading periods
include_trends: true
})// Analyze website traffic trends
moving_averages({
table_name: "daily_visitors",
value_column: "unique_visitors",
time_column: "date",
window_sizes: [7, 28], // Weekly and monthly trends
seasonal_adjustment: true
})correlation_analysis({
table_name: "marketing_data",
columns: ["ad_spend", "revenue", "conversion_rate"],
method: "pearson" // pearson, spearman, kendall
})// Find relationships in customer data
correlation_analysis({
table_name: "customers",
columns: ["age", "income", "purchase_frequency", "lifetime_value"],
method: "spearman",
significance_test: true
})// Analyze product metrics relationships
correlation_analysis({
table_name: "products",
columns: ["price", "rating", "sales_volume", "profit_margin"],
method: "pearson",
create_matrix: true
})distribution_analysis({
table_name: "customer_orders",
column_name: "order_value",
tests: ["normality", "skewness", "kurtosis"]
})Analyzes:
- Normality tests (Shapiro-Wilk, Kolmogorov-Smirnov)
- Skewness (left/right tail bias)
- Kurtosis (tail heaviness)
- Distribution type identification
// Analyze manufacturing data distribution
distribution_analysis({
table_name: "production_metrics",
column_name: "defect_rate",
tests: ["normality", "outliers"],
confidence_level: 0.95
})outlier_detection({
table_name: "sales_data",
column_name: "revenue",
methods: ["iqr", "zscore"],
thresholds: {"iqr": 1.5, "zscore": 3}
})// Detect unusual transaction patterns
outlier_detection({
table_name: "transactions",
columns: ["amount", "frequency", "location_changes"],
method: "isolation_forest",
contamination: 0.05
})// Identify performance anomalies
outlier_detection({
table_name: "server_metrics",
columns: ["cpu_usage", "memory_usage", "response_time"],
method: "zscore",
threshold: 2.5,
time_window: "last_24_hours"
})regression_analysis({
table_name: "marketing_campaigns",
dependent_variable: "revenue",
independent_variables: ["ad_spend", "campaign_duration", "target_audience_size"],
model_type: "linear"
})Returns:
- Regression coefficients
- R-squared value
- P-values and significance
- Prediction intervals
- Model diagnostics
// Predict future sales based on historical data
regression_analysis({
table_name: "monthly_sales",
dependent_variable: "sales",
independent_variables: ["month_number", "marketing_spend", "seasonal_factor"],
model_type: "multiple_linear",
include_predictions: true
})// Analyze price-demand relationship
regression_analysis({
table_name: "product_sales",
dependent_variable: "units_sold",
independent_variables: ["price", "competitor_price", "promotion_active"],
model_type: "polynomial",
degree: 2
})hypothesis_testing({
table_name: "ab_test_results",
groups: ["control", "treatment"],
metric: "conversion_rate",
test_type: "t_test",
confidence_level: 0.95
})// Compare satisfaction across regions
hypothesis_testing({
table_name: "customer_surveys",
groups: ["north", "south", "east", "west"],
metric: "satisfaction_score",
test_type: "anova",
post_hoc: "tukey"
})// Test if production meets quality standards
hypothesis_testing({
table_name: "quality_measurements",
metric: "defect_rate",
test_type: "one_sample_t",
null_hypothesis: 0.02, // 2% defect rate standard
alternative: "less"
})// 1. Get overview of sales performance
descriptive_statistics({
table_name: "monthly_sales",
column_name: "revenue"
})
// 2. Understand distribution
percentile_analysis({
table_name: "monthly_sales",
column_name: "revenue",
percentiles: [10, 25, 50, 75, 90]
})
// 3. Analyze trends over time
moving_averages({
table_name: "monthly_sales",
value_column: "revenue",
time_column: "month",
window_sizes: [3, 6, 12]
})
// 4. Find correlations with marketing spend
correlation_analysis({
table_name: "monthly_data",
columns: ["revenue", "marketing_spend", "customer_acquisition"],
method: "pearson"
})
// 5. Detect unusual months
outlier_detection({
table_name: "monthly_sales",
column_name: "revenue",
method: "iqr"
})// 1. Customer value distribution
descriptive_statistics({
table_name: "customers",
column_name: "lifetime_value",
group_by: "acquisition_channel"
})
// 2. Segment customers by value
percentile_analysis({
table_name: "customers",
column_name: "lifetime_value",
percentiles: [25, 50, 75, 90, 95]
})
// 3. Analyze behavior correlations
correlation_analysis({
table_name: "customers",
columns: ["age", "income", "purchase_frequency", "avg_order_value"],
method: "spearman"
})
// 4. Compare segments
hypothesis_testing({
table_name: "customers",
groups: ["premium", "standard", "basic"],
metric: "satisfaction_score",
test_type: "anova"
})// Always check data distribution before analysis
distribution_analysis({
table_name: "sales_data",
column_name: "revenue",
tests: ["normality", "completeness"]
})
// Remove outliers if necessary
outlier_detection({
table_name: "sales_data",
column_name: "revenue",
method: "iqr",
action: "flag" // Don't auto-remove, just flag
})// Use Spearman for non-linear relationships
correlation_analysis({
table_name: "customer_data",
columns: ["satisfaction", "loyalty_score"],
method: "spearman" // Better for ordinal data
})
// Use appropriate sample sizes for hypothesis testing
hypothesis_testing({
table_name: "experiment_results",
metric: "conversion_rate",
test_type: "t_test",
min_sample_size: 100 // Ensure statistical power
})// Cross-validate regression models
regression_analysis({
table_name: "training_data",
dependent_variable: "sales",
independent_variables: ["price", "promotion"],
cross_validation: true,
folds: 5
})
// Test assumptions
distribution_analysis({
table_name: "regression_residuals",
column_name: "residual",
tests: ["normality", "homoscedasticity"]
})// Always document significant findings
append_insight({
insight: "Customer lifetime value shows strong positive correlation (r=0.78, p<0.001) with initial purchase amount. Customers spending >$100 on first purchase have 3x higher LTV."
})- Core-Database-Tools - Basic database operations
- Advanced-Text-Processing - Text analysis tools
- Best-Practices - Usage patterns and recommendations
- Troubleshooting - Common statistical analysis issues
π Analytics Tip: Statistical analysis is most powerful when combined with domain knowledge. Always interpret results in business context and validate findings with stakeholders.