# LLM Data Analysis Assistant - Complete Demo

This notebook demonstrates the full workflow:
1. Load and analyze datasets
2. Ask questions in natural language
3. Generate automatic insights
4. Create visualizations
5. Export analysis report

In [None]:
import sys
sys.path.append('../src')

from data_assistant import LLMDataAssistant
from visualizations import VisualizationGenerator
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')

## Step 1: Create Sample Dataset

In [None]:
# Generate realistic sales data
np.random.seed(42)
dates = pd.date_range('2024-01-01', periods=365)

data = pd.DataFrame({
    'date': dates,
    'sales': np.random.poisson(100, 365) + np.arange(365) * 0.3,
    'revenue': np.random.normal(5000, 1000, 365) + np.arange(365) * 5,
    'customers': np.random.randint(50, 150, 365),
    'region': np.random.choice(['North', 'South', 'East', 'West'], 365),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Food'], 365)
})

# Add some missing values (realistic)
data.loc[np.random.choice(365, 30), 'revenue'] = np.nan
data.loc[np.random.choice(365, 15), 'customers'] = np.nan

# Save to CSV
data.to_csv('../data/sales_data.csv', index=False)

print(f"âœ… Created dataset with {len(data)} rows")
data.head()

## Step 2: Initialize Assistant

In [None]:
# Create assistant and load data
assistant = LLMDataAssistant()
assistant.load_data('../data/sales_data.csv')

## Step 3: Ask Questions

In [None]:
# Question 1: What columns do we have?
result = assistant.ask("What columns are in the dataset?")
print(f"Q: What columns are in the dataset?")
print(f"A: {result['answer']}\n")

In [None]:
# Question 2: How many rows?
result = assistant.ask("How many rows are there?")
print(f"Q: How many rows are there?")
print(f"A: {result['answer']}\n")

In [None]:
# Question 3: Missing values?
result = assistant.ask("Are there any missing values?")
print(f"Q: Are there any missing values?")
print(f"A: {result['answer']}\n")

In [None]:
# Question 4: Correlations
result = assistant.ask("Show me correlations between numeric columns")
print(f"Q: Show me correlations")
print(f"A: {result['answer']}\n")

if result.get('data'):
    print("Top correlations:")
    for corr in result['data'][:3]:
        print(f"  {corr['col1']} <-> {corr['col2']}: {corr['correlation']:.3f}")

## Step 4: Generate Automatic Insights

In [None]:
insights = assistant.generate_insights()

print("ðŸ’¡ Automatic Insights:")
print("=" * 50)
for insight in insights:
    print(f"  {insight}")

## Step 5: Create Visualizations

In [None]:
# Initialize visualization generator
viz = VisualizationGenerator(assistant.df)

# Generate all visualizations
plots = viz.generate_all()

print(f"âœ… Generated {len(plots)} visualizations:")
for name, path in plots.items():
    print(f"  - {name}")

In [None]:
# Display some visualizations inline
from IPython.display import Image, display

print("Correlation Heatmap:")
display(Image(filename='../assets/correlation_heatmap.png', width=600))

In [None]:
# Create comprehensive summary report
report_path = viz.create_summary_report()
print(f"âœ… Summary report created: {report_path}")

display(Image(filename=report_path, width=800))

## Step 6: Interactive Analysis

In [None]:
# You can ask custom questions here
custom_query = "What should I visualize?"

result = assistant.ask(custom_query)
print(f"Q: {custom_query}")
print(f"A: {result['answer']}")

## Step 7: Statistical Summary

In [None]:
# Get comprehensive statistical summary
result = assistant.ask("Give me a statistical summary")
print(result['answer'])

## Conclusion

This notebook demonstrated:
- âœ… Loading and analyzing datasets
- âœ… Natural language querying
- âœ… Automatic insight generation
- âœ… Comprehensive visualizations
- âœ… Statistical analysis

The assistant can handle various data analysis tasks through simple natural language queries!

## Next Steps

Try analyzing your own datasets:
```python
assistant = LLMDataAssistant()
assistant.load_data('your_data.csv')
result = assistant.ask('Your question here')
```